In [5]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from matplotlib.gridspec import GridSpec
import os
import seaborn as sns
import warnings
from IPython.display import display # For styled pandas tables
In [2]:
# Set style for publication-quality plots
sns.set_style("whitegrid")
plt.rcParams['figure.dpi'] = 300
plt.rcParams['savefig.dpi'] = 300

# Define paths
data_path = "/Users/valar/Documents/Projects/MRI-TAVI-Cytokines/data"
results_path = "/Users/valar/Documents/Projects/MRI-TAVI-Cytokines/results"

# Read the data files
clinical_df = pd.read_csv(f"{data_path}/MRI-TAVI_Clinical.tsv", sep='\t')
mri_df = pd.read_csv(f"{data_path}/MRI-TAVI_Readings.tsv", sep='\t')
cytokines_df = pd.read_csv(f"{data_path}/MRI-TAVI-Jane-Cytokines.tsv", sep='\t')

# Remove ST11 from all dataframes
clinical_df = clinical_df[clinical_df['Subject'] != 'ST11'].copy()
mri_df = mri_df[mri_df['Subject'] != 'ST11'].copy()
cytokines_df = cytokines_df[cytokines_df['Subject'] != 'ST11'].copy()

# Reset indices
clinical_df.reset_index(drop=True, inplace=True)
mri_df.reset_index(drop=True, inplace=True)
cytokines_df.reset_index(drop=True, inplace=True)

# Verify the filtering worked
print("Data loaded and ST11 removed:")
print(f"Clinical data: {clinical_df.shape[0]} patients")
print(f"MRI data: {mri_df.shape[0]} patients")
print(f"Cytokines data: {cytokines_df.shape[0]} observations (should be 40 = 10 patients × 4 timepoints)")
print(f"\nPatients included: {sorted(clinical_df['Subject'].tolist())}")
Data loaded and ST11 removed:
Clinical data: 10 patients
MRI data: 10 patients
Cytokines data: 40 observations (should be 40 = 10 patients × 4 timepoints)

Patients included: ['ST01', 'ST03', 'ST04', 'ST05', 'ST14', 'ST20', 'ST21', 'ST22', 'ST23', 'ST24']

Step 1: Clinical Summary Table¶

In [3]:
# Calculate summary statistics
n_patients = len(clinical_df)

# Age statistics
age_mean = clinical_df['Age'].mean()
age_std = clinical_df['Age'].std()
age_min = clinical_df['Age'].min()
age_max = clinical_df['Age'].max()

# Sex distribution
sex_counts = clinical_df['Sex'].value_counts()
n_male = sex_counts.get('M', 0)
n_female = sex_counts.get('F', 0)
pct_male = (n_male / n_patients) * 100
pct_female = (n_female / n_patients) * 100

# CFS statistics (median and IQR)
cfs_median = clinical_df['CFS'].median()
cfs_q1 = clinical_df['CFS'].quantile(0.25)
cfs_q3 = clinical_df['CFS'].quantile(0.75)

# ACE-3 statistics (mean and range)
# Note: There might be missing values
ace3_mean = clinical_df['ACE-3'].mean()
ace3_min = clinical_df['ACE-3'].min()
ace3_max = clinical_df['ACE-3'].max()
ace3_missing = clinical_df['ACE-3'].isna().sum()

# Create the summary table
summary_data = {
    'Characteristic': [
        'N (participants)',
        'Age (mean ± SD)',
        'Sex (M/F)',
        'CFS (median [IQR])',
        'ACE-3 (mean [range])'
    ],
    'Value': [
        f'{n_patients}',
        f'{age_mean:.1f} years (range {age_min}–{age_max})',
        f'{n_male} M ({pct_male:.0f}%) / {n_female} F ({pct_female:.0f}%)',
        f'{cfs_median:.0f} [{cfs_q1:.0f}–{cfs_q3:.0f}]',
        f'{ace3_mean:.1f} [{ace3_min:.0f}–{ace3_max:.0f}]' if ace3_missing == 0 else f'{ace3_mean:.1f} [{ace3_min:.0f}–{ace3_max:.0f}] (n={n_patients - ace3_missing})'
    ]
}

table1_df = pd.DataFrame(summary_data)

# Display the table
print("\n" + "="*80)
print("TABLE 1: Summary Statistics for Study Cohort (N = 10)")
print("="*80)
print(table1_df.to_string(index=False))
print("="*80)

# Save the table to CSV
table1_df.to_csv(f"{results_path}/Table1_Clinical_Summary.csv", index=False)
print(f"\nTable saved to: {results_path}/Table1_Clinical_Summary.csv")

# Also show detailed breakdown for verification
print("\n" + "="*80)
print("DETAILED BREAKDOWN FOR VERIFICATION")
print("="*80)
print(f"\nAge: Mean={age_mean:.2f}, SD={age_std:.2f}, Range=[{age_min}-{age_max}]")
print(f"Sex: Male={n_male}, Female={n_female}")
print(f"CFS: Median={cfs_median}, Q1={cfs_q1}, Q3={cfs_q3}")
print(f"ACE-3: Mean={ace3_mean:.2f}, Min={ace3_min}, Max={ace3_max}, Missing={ace3_missing}")
print(f"\nIndividual patient data:")
print(clinical_df[['Subject', 'Age', 'Sex', 'CFS', 'ACE-3']].to_string(index=False))
================================================================================
TABLE 1: Summary Statistics for Study Cohort (N = 10)
================================================================================
      Characteristic                    Value
    N (participants)                       10
     Age (mean ± SD) 80.6 years (range 76–88)
           Sex (M/F)    6 M (60%) / 4 F (40%)
  CFS (median [IQR])                  4 [4–5]
ACE-3 (mean [range])       88.9 [65–99] (n=9)
================================================================================

Table saved to: /Users/valar/Documents/Projects/MRI-TAVI-Cytokines/results/Table1_Clinical_Summary.csv

================================================================================
DETAILED BREAKDOWN FOR VERIFICATION
================================================================================

Age: Mean=80.60, SD=3.69, Range=[76-88]
Sex: Male=6, Female=4
CFS: Median=4.0, Q1=4.0, Q3=4.75
ACE-3: Mean=88.89, Min=65.0, Max=99.0, Missing=1

Individual patient data:
Subject  Age Sex  CFS  ACE-3
   ST01   83   M    4   97.0
   ST03   88   F    5   65.0
   ST04   77   M    4    NaN
   ST05   80   M    4   94.0
   ST14   85   M    5   91.0
   ST20   79   F    5   71.0
   ST21   79   M    3   91.0
   ST22   80   F    4   97.0
   ST23   76   M    2   99.0
   ST24   79   F    4   95.0

Step 2: Summary Data Plots¶

Panel plot + single plots¶

In [6]:
# Set publication-quality style
plt.rcParams.update({
    'font.size': 12,
    'font.family': 'sans-serif',
    'font.sans-serif': ['Arial'],
    'axes.labelsize': 13,
    'axes.titlesize': 14,
    'xtick.labelsize': 11,
    'ytick.labelsize': 11,
    'legend.fontsize': 11,
    'figure.dpi': 300,
    'savefig.dpi': 300,
    'savefig.bbox': 'tight',
    'axes.linewidth': 1.2,
})

# Define CONSISTENT color palette
color_main = '#2E86AB'  # Professional blue
color_accent = '#A23B72'  # Purple/magenta
color_male = '#70A9A1'  # Teal
color_female = '#CA6680'  # Rose
color_cortical = '#2E86AB'  # Blue
color_subcortical = '#A23B72'  # Purple
color_cerebellar = '#70A9A1'  # Teal

# Create Summary subfolder
summary_path = f"{results_path}/Summary"
os.makedirs(summary_path, exist_ok=True)
print(f"Created/verified folder: {summary_path}\n")

# Set random seed for reproducibility
np.random.seed(42)

# ============================================================================
# HELPER FUNCTION TO CREATE EACH PLOT
# ============================================================================

def plot_age_distribution(ax):
    """Plot age distribution histogram"""
    ages = clinical_df['Age'].values
    ax.hist(ages, bins=6, color=color_main, alpha=0.7, edgecolor='black', linewidth=1.2)
    ax.axvline(ages.mean(), color=color_accent, linestyle='--', linewidth=2.5, 
               label=f'Mean: {ages.mean():.1f} years')
    ax.set_xlabel('Age (years)', fontweight='bold')
    ax.set_ylabel('Frequency', fontweight='bold')
    ax.set_title('Age Distribution', fontweight='bold', pad=20)
    ax.legend(frameon=False, loc='upper right')
    ax.spines['top'].set_visible(False)
    ax.spines['right'].set_visible(False)
    ax.grid(False)

def plot_sex_distribution(ax):
    """Plot sex distribution bar chart"""
    sex_counts = clinical_df['Sex'].value_counts()
    n_male = sex_counts.get('M', 0)
    n_female = sex_counts.get('F', 0)
    total = len(clinical_df)
    pct_male = (n_male / total) * 100
    pct_female = (n_female / total) * 100
    
    bars = ax.bar(['Male', 'Female'], [n_male, n_female], 
                   color=[color_male, color_female], 
                   edgecolor='black', linewidth=1.2, alpha=0.8)
    ax.set_ylabel('Number of Patients', fontweight='bold')
    ax.set_title('Sex Distribution', fontweight='bold', pad=25)
    ax.spines['top'].set_visible(False)
    ax.spines['right'].set_visible(False)
    ax.set_ylim(0, max(n_male, n_female) + 2.5)
    ax.grid(False)
    
    # Add percentage labels on bars
    for i, (bar, count, pct) in enumerate(zip(bars, [n_male, n_female], [pct_male, pct_female])):
        height = bar.get_height()
        ax.text(bar.get_x() + bar.get_width()/2., height + 0.3,
                 f'n={int(count)}\n({pct:.0f}%)',
                 ha='center', va='bottom', fontweight='bold', fontsize=12)

def plot_cfs_distribution(ax):
    """Plot CFS distribution bar chart"""
    cfs_data = clinical_df['CFS'].values
    cfs_counts = pd.Series(cfs_data).value_counts().sort_index()
    
    bars = ax.bar(cfs_counts.index, cfs_counts.values, color=color_main, 
                  alpha=0.7, edgecolor='black', linewidth=1.2, width=0.6)
    
    ax.set_xlabel('CFS Score', fontweight='bold')
    ax.set_ylabel('Frequency', fontweight='bold')
    ax.set_title('Clinical Frailty Scale', fontweight='bold', pad=20)
    ax.spines['top'].set_visible(False)
    ax.spines['right'].set_visible(False)
    ax.grid(False)
    ax.yaxis.set_major_locator(plt.MaxNLocator(integer=True))
    ax.set_xticks(cfs_counts.index)

def plot_ace3_distribution(ax):
    """Plot ACE-3 violin plot"""
    ace3_data = clinical_df['ACE-3'].dropna().values
    
    # Create violin plot
    parts = ax.violinplot([ace3_data], positions=[0], widths=0.7,
                            showmeans=False, showmedians=False, showextrema=False)
    for pc in parts['bodies']:
        pc.set_facecolor(color_accent)
        pc.set_alpha(0.7)
        pc.set_edgecolor('black')
        pc.set_linewidth(1.5)
    
    # Add individual points with jitter
    jitter = np.random.normal(0, 0.04, size=len(ace3_data))
    ax.scatter(jitter, ace3_data, alpha=0.6, s=80, color='#6B1E55', 
               edgecolors='black', linewidth=0.8, zorder=3)
    
    # Add mean line
    mean_val = np.mean(ace3_data)
    ax.hlines(mean_val, -0.25, 0.25, colors='#FF6B35', linewidth=3.5, zorder=4, 
              label=f'Mean: {mean_val:.1f}')
    
    ax.set_ylabel('ACE-3 Score', fontweight='bold')
    ax.set_title('Cognitive Function (ACE-3)', fontweight='bold', pad=20)
    ax.set_xticks([])
    ax.set_xlim(-0.5, 0.5)
    ax.spines['top'].set_visible(False)
    ax.spines['right'].set_visible(False)
    ax.spines['bottom'].set_visible(False)
    ax.legend(frameon=False, loc='upper right')
    ax.grid(False)

def plot_infarct_burden(ax):
    """Plot infarct burden histogram with 1-unit bins"""
    infarct_counts = mri_df['POST_Number of new acute infarcts'].values
    
    # Create 1-unit bins
    bins = np.arange(-0.5, max(infarct_counts) + 1.5, 1)
    n, bins_result, patches = ax.hist(infarct_counts, bins=bins, color=color_accent, alpha=0.7, 
                                      edgecolor='black', linewidth=1.2)
    
    ax.set_xlabel('Number of New Infarcts', fontweight='bold')
    ax.set_ylabel('Frequency', fontweight='bold')
    ax.set_title('Post-TAVI Infarct Burden', fontweight='bold', pad=20)
    ax.spines['top'].set_visible(False)
    ax.spines['right'].set_visible(False)
    ax.grid(False)
    ax.yaxis.set_major_locator(plt.MaxNLocator(integer=True))
    ax.xaxis.set_major_locator(plt.MaxNLocator(integer=True))

def plot_infarct_locations(ax):
    """Plot infarct location stacked bar chart"""
    cortical = mri_df['POST_New supratentorial infarcts - cortical (number)'].values
    subcortical = mri_df['POST_New supratentorial infarcts - subcortical (number)'].values
    cerebellar = mri_df['POST_New Infarcts - Cerebellum (number)'].values
    
    subjects = mri_df['Subject'].values
    x_pos = np.arange(len(subjects))
    
    p1 = ax.bar(x_pos, cortical, color=color_cortical, edgecolor='black', 
                linewidth=1, label='Cortical')
    p2 = ax.bar(x_pos, subcortical, bottom=cortical, color=color_subcortical, 
                 edgecolor='black', linewidth=1, label='Subcortical')
    p3 = ax.bar(x_pos, cerebellar, bottom=cortical+subcortical, color=color_cerebellar,
                 edgecolor='black', linewidth=1, label='Cerebellar')
    
    ax.set_xlabel('Patient', fontweight='bold')
    ax.set_ylabel('Number of Infarcts', fontweight='bold')
    ax.set_title('Infarct Distribution by Location', fontweight='bold', pad=20)
    ax.set_xticks(x_pos)
    ax.set_xticklabels(subjects, rotation=60, ha='right', fontsize=11)
    ax.legend(frameon=False, loc='upper left', fontsize=11)
    ax.spines['top'].set_visible(False)
    ax.spines['right'].set_visible(False)
    ax.grid(False)

# ============================================================================
# GENERATE INDIVIDUAL FIGURES
# ============================================================================
print("Generating individual figures...")

# Figure 1: Age Distribution
fig1, ax1 = plt.subplots(figsize=(8, 6))
plot_age_distribution(ax1)
plt.tight_layout()
plt.savefig(f'{summary_path}/Fig1_Age_Distribution.png', dpi=300, bbox_inches='tight')
print("Saved: Fig1_Age_Distribution")
plt.show()

# Figure 2: Sex Distribution
fig2, ax2 = plt.subplots(figsize=(8, 6))
plot_sex_distribution(ax2)
plt.tight_layout()
plt.savefig(f'{summary_path}/Fig2_Sex_Distribution.png', dpi=300, bbox_inches='tight')
print("Saved: Fig2_Sex_Distribution")
plt.show()

# Figure 3: CFS Distribution
fig3, ax3 = plt.subplots(figsize=(8, 6))
plot_cfs_distribution(ax3)
plt.tight_layout()
plt.savefig(f'{summary_path}/Fig3_CFS_Distribution.png', dpi=300, bbox_inches='tight')
print("Saved: Fig3_CFS_Distribution")
plt.show()

# Figure 4: ACE-3 Distribution
np.random.seed(42)  # Reset seed for consistency
fig4, ax4 = plt.subplots(figsize=(8, 6))
plot_ace3_distribution(ax4)
plt.tight_layout()
plt.savefig(f'{summary_path}/Fig4_ACE3_Distribution.png', dpi=300, bbox_inches='tight')
print("Saved: Fig4_ACE3_Distribution")
plt.show()

# Figure 5: Infarct Burden
fig5, ax5 = plt.subplots(figsize=(8, 6))
plot_infarct_burden(ax5)
plt.tight_layout()
plt.savefig(f'{summary_path}/Fig5_Infarct_Burden.png', dpi=300, bbox_inches='tight')
print("Saved: Fig5_Infarct_Burden")
plt.show()

# Figure 6: Infarct Locations
fig6, ax6 = plt.subplots(figsize=(10, 6))
plot_infarct_locations(ax6)
plt.tight_layout()
plt.savefig(f'{summary_path}/Fig6_Infarct_Locations.png', dpi=300, bbox_inches='tight')
print("Saved: Fig6_Infarct_Locations")
plt.show()

# ============================================================================
# GENERATE COMBINED PANEL FIGURE
# ============================================================================
print("\nGenerating combined panel figure...")

# Create figure with 2x3 grid
fig_combined = plt.figure(figsize=(18, 12))
gs = GridSpec(2, 3, figure=fig_combined, hspace=0.4, wspace=0.3)

# Panel 1: Age Distribution
ax_panel1 = fig_combined.add_subplot(gs[0, 0])
plot_age_distribution(ax_panel1)

# Panel 2: Sex Distribution
ax_panel2 = fig_combined.add_subplot(gs[0, 1])
plot_sex_distribution(ax_panel2)

# Panel 3: CFS Distribution
ax_panel3 = fig_combined.add_subplot(gs[0, 2])
plot_cfs_distribution(ax_panel3)

# Panel 4: ACE-3 Distribution
np.random.seed(42)  # Reset seed for consistency
ax_panel4 = fig_combined.add_subplot(gs[1, 0])
plot_ace3_distribution(ax_panel4)

# Panel 5: Infarct Burden
ax_panel5 = fig_combined.add_subplot(gs[1, 1])
plot_infarct_burden(ax_panel5)

# Panel 6: Infarct Locations
ax_panel6 = fig_combined.add_subplot(gs[1, 2])
plot_infarct_locations(ax_panel6)

plt.tight_layout()
plt.savefig(f'{summary_path}/Figure_Combined_Cohort_Summary.png', dpi=300, bbox_inches='tight')
print("Saved: Figure_Combined_Cohort_Summary")
plt.show()

# ============================================================================
# Print summary
# ============================================================================
print("\n" + "="*80)
print("ALL FIGURES SAVED SUCCESSFULLY")
print("="*80)
print(f"Location: {summary_path}/")
print("\nIndividual figures:")
print("  - Fig1_Age_Distribution (PNG + PDF)")
print("  - Fig2_Sex_Distribution (PNG + PDF)")
print("  - Fig3_CFS_Distribution (PNG + PDF)")
print("  - Fig4_ACE3_Distribution (PNG + PDF)")
print("  - Fig5_Infarct_Burden (PNG + PDF)")
print("  - Fig6_Infarct_Locations (PNG + PDF)")
print("\nCombined figure:")
print("  - Figure_Combined_Cohort_Summary (PNG + PDF)")
Created/verified folder: /Users/valar/Documents/Projects/MRI-TAVI-Cytokines/results/Summary

Generating individual figures...
Saved: Fig1_Age_Distribution
No description has been provided for this image
Saved: Fig2_Sex_Distribution
No description has been provided for this image
Saved: Fig3_CFS_Distribution
No description has been provided for this image
Saved: Fig4_ACE3_Distribution
No description has been provided for this image
Saved: Fig5_Infarct_Burden
No description has been provided for this image
Saved: Fig6_Infarct_Locations
No description has been provided for this image
Generating combined panel figure...
/var/folders/zw/yj6r954924g9lyyz7v40_d2h0000gn/T/ipykernel_34702/932766018.py:260: UserWarning: This figure includes Axes that are not compatible with tight_layout, so results might be incorrect.
  plt.tight_layout()
Saved: Figure_Combined_Cohort_Summary
No description has been provided for this image
================================================================================
ALL FIGURES SAVED SUCCESSFULLY
================================================================================
Location: /Users/valar/Documents/Projects/MRI-TAVI-Cytokines/results/Summary/

Individual figures:
  - Fig1_Age_Distribution (PNG + PDF)
  - Fig2_Sex_Distribution (PNG + PDF)
  - Fig3_CFS_Distribution (PNG + PDF)
  - Fig4_ACE3_Distribution (PNG + PDF)
  - Fig5_Infarct_Burden (PNG + PDF)
  - Fig6_Infarct_Locations (PNG + PDF)

Combined figure:
  - Figure_Combined_Cohort_Summary (PNG + PDF)

Alternative panel (to be confirmed)¶

In [8]:
# Set publication-quality style
plt.rcParams.update({
    'font.size': 10,
    'font.family': 'sans-serif',
    'font.sans-serif': ['Arial'],
    'axes.labelsize': 11,
    'axes.titlesize': 12,
    'xtick.labelsize': 9,
    'ytick.labelsize': 9,
    'legend.fontsize': 9,
    'figure.titlesize': 13,
    'figure.dpi': 300,
    'savefig.dpi': 300,
    'savefig.bbox': 'tight',
    'axes.linewidth': 1.2,
})

# Define color palette
color_primary = '#3498db'  # Blue
color_secondary = '#e74c3c'  # Red
color_male = '#5DADE2'  # Light blue
color_female = '#F1948A'  # Light red/pink
colors_locations = ['#3498db', '#e74c3c', '#2ecc71', '#f39c12']

# Create figure with 2x3 grid
fig = plt.figure(figsize=(15, 10))
gs = GridSpec(2, 3, figure=fig, hspace=0.4, wspace=0.3)

# ============================================================================
# PANEL 1: Age Distribution
# ============================================================================
ax1 = fig.add_subplot(gs[0, 0])
ages = clinical_df['Age'].values
ax1.hist(ages, bins=6, color=color_primary, alpha=0.7, edgecolor='black', linewidth=1.2)
ax1.axvline(ages.mean(), color='red', linestyle='--', linewidth=2, label=f'Mean: {ages.mean():.1f} years')
ax1.set_xlabel('Age (years)', fontweight='bold')
ax1.set_ylabel('Frequency', fontweight='bold')
ax1.set_title('Age Distribution', fontweight='bold', pad=20)
ax1.legend(frameon=False, loc='upper left')
ax1.spines['top'].set_visible(False)
ax1.spines['right'].set_visible(False)

# ============================================================================
# PANEL 2: Sex Distribution
# ============================================================================
ax2 = fig.add_subplot(gs[0, 1])
sex_counts = clinical_df['Sex'].value_counts()
n_male = sex_counts.get('M', 0)
n_female = sex_counts.get('F', 0)
total = len(clinical_df)
pct_male = (n_male / total) * 100
pct_female = (n_female / total) * 100

bars = ax2.bar(['Male', 'Female'], [n_male, n_female], 
               color=[color_male, color_female], 
               edgecolor='black', linewidth=1.2, alpha=0.8)
ax2.set_ylabel('Number of Patients', fontweight='bold')
ax2.set_title('Sex Distribution', fontweight='bold', pad=25)
ax2.spines['top'].set_visible(False)
ax2.spines['right'].set_visible(False)
ax2.set_ylim(0, max(n_male, n_female) + 2.5)  # More space at top

# Add percentage labels on bars with better positioning
for i, (bar, count, pct) in enumerate(zip(bars, [n_male, n_female], [pct_male, pct_female])):
    height = bar.get_height()
    ax2.text(bar.get_x() + bar.get_width()/2., height + 0.3,
             f'n={int(count)}\n({pct:.0f}%)',
             ha='center', va='bottom', fontweight='bold', fontsize=10)

# ============================================================================
# PANEL 3: Clinical Frailty Scale (CFS) - Using Box Plot Instead
# ============================================================================
ax3 = fig.add_subplot(gs[0, 2])
cfs_data = clinical_df['CFS'].values

# Create box plot
bp = ax3.boxplot([cfs_data], positions=[0], widths=0.5, patch_artist=True,
                  boxprops=dict(facecolor=color_primary, alpha=0.7, edgecolor='black', linewidth=1.5),
                  medianprops=dict(color='red', linewidth=2.5),
                  whiskerprops=dict(color='black', linewidth=1.5),
                  capprops=dict(color='black', linewidth=1.5),
                  flierprops=dict(marker='o', markerfacecolor='darkblue', markersize=8, 
                                 markeredgecolor='black', markeredgewidth=0.5))

# Add individual points with jitter
np.random.seed(42)  # For reproducibility
jitter = np.random.normal(0, 0.04, size=len(cfs_data))
ax3.scatter(jitter, cfs_data, alpha=0.6, s=60, color='darkblue', 
           edgecolors='black', linewidth=0.8, zorder=3)

median_val = np.median(cfs_data)
ax3.set_ylabel('CFS Score', fontweight='bold')
ax3.set_title('Clinical Frailty Scale', fontweight='bold', pad=20)
ax3.set_xticks([])
ax3.set_xlim(-0.5, 0.5)
ax3.spines['top'].set_visible(False)
ax3.spines['right'].set_visible(False)
ax3.spines['bottom'].set_visible(False)
# Add median label to the side
ax3.text(0.55, median_val, f'Median: {median_val:.0f}', 
         va='center', fontsize=9, fontweight='bold', color='red')

# ============================================================================
# PANEL 4: ACE-3 Score - Using Box Plot Instead
# ============================================================================
ax4 = fig.add_subplot(gs[1, 0])
ace3_data = clinical_df['ACE-3'].dropna().values

# Create box plot
bp = ax4.boxplot([ace3_data], positions=[0], widths=0.5, patch_artist=True,
                  boxprops=dict(facecolor=color_secondary, alpha=0.7, edgecolor='black', linewidth=1.5),
                  medianprops=dict(color='blue', linewidth=2.5),
                  whiskerprops=dict(color='black', linewidth=1.5),
                  capprops=dict(color='black', linewidth=1.5),
                  flierprops=dict(marker='o', markerfacecolor='darkred', markersize=8,
                                 markeredgecolor='black', markeredgewidth=0.5))

# Add individual points with jitter
np.random.seed(42)
jitter = np.random.normal(0, 0.04, size=len(ace3_data))
ax4.scatter(jitter, ace3_data, alpha=0.6, s=60, color='darkred', 
           edgecolors='black', linewidth=0.8, zorder=3)

mean_val = np.mean(ace3_data)
ax4.set_ylabel('ACE-3 Score', fontweight='bold')
ax4.set_title('Cognitive Function (ACE-3)', fontweight='bold', pad=20)
ax4.set_xticks([])
ax4.set_xlim(-0.5, 0.5)
ax4.spines['top'].set_visible(False)
ax4.spines['right'].set_visible(False)
ax4.spines['bottom'].set_visible(False)
# Add mean label to the side
ax4.text(0.55, mean_val, f'Mean: {mean_val:.1f}', 
         va='center', fontsize=9, fontweight='bold', color='blue')

# ============================================================================
# PANEL 5: New Infarct Burden Distribution
# ============================================================================
ax5 = fig.add_subplot(gs[1, 1])
infarct_counts = mri_df['POST_Number of new acute infarcts'].values
bins = [0, 1, 5, 10, 15, 20, 25]
ax5.hist(infarct_counts, bins=bins, color=color_secondary, alpha=0.7, 
         edgecolor='black', linewidth=1.2)
median_infarcts = np.median(infarct_counts)
ax5.axvline(median_infarcts, color='blue', linestyle='--', 
            linewidth=2.5, label=f'Median: {median_infarcts:.0f}')
ax5.set_xlabel('Number of New Infarcts', fontweight='bold')
ax5.set_ylabel('Frequency', fontweight='bold')
ax5.set_title('Post-TAVI Infarct Burden', fontweight='bold', pad=20)
ax5.legend(frameon=False, loc='upper right')
ax5.spines['top'].set_visible(False)
ax5.spines['right'].set_visible(False)
ax5.set_ylim(0, 3.5)  # Add more space at top

# Add statistics text box with better positioning
n_with_infarcts = np.sum(infarct_counts > 0)
pct_with_infarcts = (n_with_infarcts / len(infarct_counts)) * 100
ax5.text(0.98, 0.70, f'Patients with\nnew infarcts:\n{n_with_infarcts}/{len(infarct_counts)} ({pct_with_infarcts:.0f}%)',
         transform=ax5.transAxes, ha='right', va='top', 
         bbox=dict(boxstyle='round', facecolor='wheat', alpha=0.6, pad=0.5), 
         fontsize=9, linespacing=1.5)

# ============================================================================
# PANEL 6: Infarct Location Distribution
# ============================================================================
ax6 = fig.add_subplot(gs[1, 2])

# Extract location data
cortical = mri_df['POST_New supratentorial infarcts - cortical (number)'].values
subcortical = mri_df['POST_New supratentorial infarcts - subcortical (number)'].values
cerebellar = mri_df['POST_New Infarcts - Cerebellum (number)'].values

# Create stacked bar chart
subjects = mri_df['Subject'].values
x_pos = np.arange(len(subjects))

p1 = ax6.bar(x_pos, cortical, color=colors_locations[0], edgecolor='black', linewidth=0.8, label='Cortical')
p2 = ax6.bar(x_pos, subcortical, bottom=cortical, color=colors_locations[1], 
             edgecolor='black', linewidth=0.8, label='Subcortical')
p3 = ax6.bar(x_pos, cerebellar, bottom=cortical+subcortical, color=colors_locations[2],
             edgecolor='black', linewidth=0.8, label='Cerebellar')

ax6.set_xlabel('Patient', fontweight='bold')
ax6.set_ylabel('Number of Infarcts', fontweight='bold')
ax6.set_title('Infarct Distribution by Location', fontweight='bold', pad=20)
ax6.set_xticks(x_pos)
ax6.set_xticklabels(subjects, rotation=45, ha='right', fontsize=9)
ax6.legend(frameon=False, loc='upper left', fontsize=9)
ax6.spines['top'].set_visible(False)
ax6.spines['right'].set_visible(False)

# ============================================================================
# Save figure
# ============================================================================
plt.tight_layout()
plt.savefig(f'{results_path}/Summary/Figure1_Cohort_Characteristics_Atlernative.png', dpi=300, bbox_inches='tight')
print(f"\nFigure saved to:")
print(f"  - {results_path}/Summary/Figure1_Cohort_Characteristics_Atlernative.png")
plt.show()

# Print summary statistics
print("\n" + "="*80)
print("SUMMARY STATISTICS FOR FIGURE 1")
print("="*80)
print(f"Age: {ages.mean():.1f} ± {ages.std():.1f} years (range: {ages.min()}-{ages.max()})")
print(f"Sex: {n_male}M ({pct_male:.0f}%) / {n_female}F ({pct_female:.0f}%)")
print(f"CFS: Median={np.median(cfs_data):.0f}, IQR=[{np.percentile(cfs_data, 25):.0f}-{np.percentile(cfs_data, 75):.0f}]")
print(f"ACE-3: {mean_val:.1f} ± {np.std(ace3_data):.1f} (range: {ace3_data.min():.0f}-{ace3_data.max():.0f})")
print(f"New infarcts: Median={median_infarcts:.0f}, Range=[{infarct_counts.min()}-{infarct_counts.max()}]")
print(f"Patients with new infarcts: {n_with_infarcts}/{len(infarct_counts)} ({pct_with_infarcts:.0f}%)")
print(f"Total cortical infarcts: {cortical.sum():.0f}")
print(f"Total subcortical infarcts: {subcortical.sum():.0f}")
print(f"Total cerebellar infarcts: {cerebellar.sum():.0f}")
/var/folders/zw/yj6r954924g9lyyz7v40_d2h0000gn/T/ipykernel_34702/3848405712.py:195: UserWarning: This figure includes Axes that are not compatible with tight_layout, so results might be incorrect.
  plt.tight_layout()
Figure saved to:
  - /Users/valar/Documents/Projects/MRI-TAVI-Cytokines/results/Summary/Figure1_Cohort_Characteristics_Atlernative.png
No description has been provided for this image
================================================================================
SUMMARY STATISTICS FOR FIGURE 1
================================================================================
Age: 80.6 ± 3.5 years (range: 76-88)
Sex: 6M (60%) / 4F (40%)
CFS: Median=4, IQR=[4-5]
ACE-3: 88.9 ± 11.5 (range: 65-99)
New infarcts: Median=6, Range=[0-24]
Patients with new infarcts: 9/10 (90%)
Total cortical infarcts: 52
Total subcortical infarcts: 7
Total cerebellar infarcts: 11

Step 3: Pre VS Post counts¶

In [15]:
import matplotlib.pyplot as plt
import numpy as np
import os

# Create folder
mri_plots_path = f"{results_path}/MRI_Analysis"
os.makedirs(mri_plots_path, exist_ok=True)

# Set style
plt.rcParams.update({
    'font.size': 12,
    'font.family': 'sans-serif',
    'font.sans-serif': ['Arial'],
    'axes.labelsize': 13,
    'axes.titlesize': 14,
    'xtick.labelsize': 12,
    'ytick.labelsize': 12,
    'legend.fontsize': 12,
    'figure.dpi': 300,
    'savefig.dpi': 300,
    'savefig.bbox': 'tight',
    'axes.linewidth': 1.2,
})

# Better, more distinct color palette
fazekas_colors = {
    1: '#E63946',  # Red
    2: '#457B9D',  # Blue  
    3: '#2A9D8F'   # Teal
}

# Extract data
pre_infarcts = mri_df['PRE_Infarcts - Total Number'].values
post_infarcts = mri_df['POST_Number of new acute infarcts'].values
fazekas_scores = mri_df['PRE_SVD Fazekas Score (1,2,3)'].values
subjects = mri_df['Subject'].values

# ============================================================================
# Plot 1: Original style slope plot with better colors
# ============================================================================
fig1, ax1 = plt.subplots(figsize=(10, 7))

# Plot lines for each patient, colored by Fazekas score
for i in range(len(mri_df)):
    fazekas = fazekas_scores[i]
    color = fazekas_colors[fazekas]
    
    # Plot line connecting pre to post
    ax1.plot([0, 1], [pre_infarcts[i], post_infarcts[i]], 
            color=color, linewidth=2.5, alpha=0.8)
    
    # Add markers
    ax1.scatter([0, 1], [pre_infarcts[i], post_infarcts[i]], 
              color=color, s=80, edgecolors='black', linewidth=1, zorder=3)

# Set x-axis
ax1.set_xlim(-0.2, 1.2)
ax1.set_xticks([0, 1])
ax1.set_xticklabels(['Pre', 'Post'], fontsize=14, fontweight='bold')

# Set y-axis
ax1.set_ylabel('Number of Infarcts', fontweight='bold', fontsize=14)
ax1.spines['top'].set_visible(False)
ax1.spines['right'].set_visible(False)
ax1.grid(False)

# Create better legend with colored patches
from matplotlib.patches import Patch
legend_elements = [
    Patch(facecolor=fazekas_colors[1], edgecolor='black', label='Fazekas Score 1'),
    Patch(facecolor=fazekas_colors[2], edgecolor='black', label='Fazekas Score 2'),
    Patch(facecolor=fazekas_colors[3], edgecolor='black', label='Fazekas Score 3')
]
ax1.legend(handles=legend_elements, loc='upper left', frameon=True, 
          fancybox=True, shadow=True, fontsize=12)

# Set title
ax1.set_title('Infarct Burden: Pre vs Post-TAVI by Fazekas Score', 
            fontweight='bold', pad=20, fontsize=15)

plt.tight_layout()
plt.savefig(f'{mri_plots_path}/Pre_vs_Post_Infarcts_by_Fazekas.png', dpi=300, bbox_inches='tight')
print("Saved: Pre_vs_Post_Infarcts_by_Fazekas")
plt.show()

# ============================================================================
# Plot 2: Change in Infarcts (Bar Chart)
# ============================================================================
fig2, ax2 = plt.subplots(figsize=(12, 7))

# Calculate change
change_infarcts = post_infarcts - pre_infarcts

## Sort by Fazekas score, then by change
#subjects_sorted = subjects[sort_idx]
#sort_idx = np.lexsort((change_infarcts, fazekas_scores))
#fazekas_sorted = fazekas_scores[sort_idx]
#change_sorted = change_infarcts[sort_idx]

# Keep original patient order (ST01, ST03, ST04, etc.)
subjects_sorted = subjects
change_sorted = change_infarcts
fazekas_sorted = fazekas_scores

# Create bar positions
x_pos = np.arange(len(subjects_sorted))

# Color bars by Fazekas score
colors_list = [fazekas_colors[f] for f in fazekas_sorted]

# Plot bars
bars = ax2.bar(x_pos, change_sorted, color=colors_list, 
              edgecolor='black', linewidth=1.2, alpha=0.8)

# Add zero line
ax2.axhline(0, color='black', linewidth=1.5, linestyle='-', zorder=1)

# Styling
ax2.set_xlabel('Patient', fontweight='bold', fontsize=14)
ax2.set_ylabel('Change in Infarct Count\n(Post - Pre)', fontweight='bold', fontsize=14)
ax2.set_title('New Infarct Burden per Patient by Fazekas Score', 
            fontweight='bold', pad=20, fontsize=15)
ax2.set_xticks(x_pos)
ax2.set_xticklabels(subjects_sorted, rotation=45, ha='right', fontsize=11)
ax2.spines['top'].set_visible(False)
ax2.spines['right'].set_visible(False)
ax2.grid(axis='y', alpha=0.3, linestyle='--')

# Create legend
legend_elements = [
    Patch(facecolor=fazekas_colors[1], edgecolor='black', label='Fazekas Score 1'),
    Patch(facecolor=fazekas_colors[2], edgecolor='black', label='Fazekas Score 2'),
    Patch(facecolor=fazekas_colors[3], edgecolor='black', label='Fazekas Score 3')
]
ax2.legend(handles=legend_elements, loc='upper left', frameon=True, 
          fancybox=True, shadow=True, fontsize=11)

plt.tight_layout()
plt.savefig(f'{mri_plots_path}/Change_in_Infarcts_by_Fazekas.png', dpi=300, bbox_inches='tight')
print("Saved: Change_in_Infarcts_by_Fazekas")
plt.show()

# ============================================================================
# Plot 3: Grouped Box Plot
# ============================================================================
fig3, ax3 = plt.subplots(figsize=(10, 7))

# Prepare data for grouped boxplot
data_for_box = []
positions_list = []
colors_list_box = []
labels_list = []

pos = 0
for fazekas in [1, 2, 3]:
    mask = fazekas_scores == fazekas
    if mask.sum() > 0:
        # Pre data
        data_for_box.append(pre_infarcts[mask])
        positions_list.append(pos)
        colors_list_box.append(fazekas_colors[fazekas])
        labels_list.append(f'F{fazekas}\nPre')
        
        # Post data
        data_for_box.append(post_infarcts[mask])
        positions_list.append(pos + 0.8)
        colors_list_box.append(fazekas_colors[fazekas])
        labels_list.append(f'F{fazekas}\nPost')
        
        pos += 2

# Create box plots
bp = ax3.boxplot(data_for_box, positions=positions_list, widths=0.6,
                patch_artist=True, showfliers=True,
                boxprops=dict(linewidth=1.5),
                medianprops=dict(color='black', linewidth=2),
                whiskerprops=dict(linewidth=1.5),
                capprops=dict(linewidth=1.5))

# Color the boxes
for patch, color in zip(bp['boxes'], colors_list_box):
    patch.set_facecolor(color)
    patch.set_alpha(0.7)

# Styling
ax3.set_xticks(positions_list)
ax3.set_xticklabels(labels_list, fontsize=11)
ax3.set_ylabel('Number of Infarcts', fontweight='bold', fontsize=14)
ax3.set_title('Infarct Distribution: Pre vs Post-TAVI by Fazekas Score', 
            fontweight='bold', pad=20, fontsize=15)
ax3.spines['top'].set_visible(False)
ax3.spines['right'].set_visible(False)
ax3.grid(axis='y', alpha=0.3, linestyle='--')

# Add legend
legend_elements = [
    Patch(facecolor=fazekas_colors[1], edgecolor='black', alpha=0.7, label='Fazekas Score 1'),
    Patch(facecolor=fazekas_colors[2], edgecolor='black', alpha=0.7, label='Fazekas Score 2'),
    Patch(facecolor=fazekas_colors[3], edgecolor='black', alpha=0.7, label='Fazekas Score 3')
]
ax3.legend(handles=legend_elements, loc='upper left', frameon=True, 
          fancybox=True, shadow=True, fontsize=11)

plt.tight_layout()
plt.savefig(f'{mri_plots_path}/Boxplot_Pre_vs_Post_by_Fazekas.png', dpi=300, bbox_inches='tight')
print("Saved: Boxplot_Pre_vs_Post_by_Fazekas")
plt.show()

# ============================================================================
# Plot 4: Improved Radial Plot - Cleaner Version
# ============================================================================
fig4, ax4 = plt.subplots(figsize=(11, 11), subplot_kw=dict(projection='polar'))

# Calculate angles for each patient
n_patients = len(subjects)
theta = np.linspace(0, 2 * np.pi, n_patients, endpoint=False)

# Get change in infarcts
change_infarcts = post_infarcts - pre_infarcts

# Create bars
bars = ax4.bar(theta, change_infarcts, width=2*np.pi/n_patients*0.85, 
               bottom=0, alpha=0.85, edgecolor='black', linewidth=2)

# Color by Fazekas score
for bar, fazekas in zip(bars, fazekas_scores):
    bar.set_facecolor(fazekas_colors[fazekas])

# Add patient labels around the circle - larger and bolder
for angle, subject, change in zip(theta, subjects, change_infarcts):
    rotation = np.rad2deg(angle)
    # Adjust text rotation for readability
    if 90 < rotation < 270:
        rotation = rotation + 180
        ha = 'right'
    else:
        ha = 'left'
    
    # Position labels outside the plot
    ax4.text(angle, max(change_infarcts) * 1.2, subject, 
            rotation=rotation, ha=ha, va='center', 
            fontweight='bold', fontsize=12)

# CLEAN UP - Remove degree markers and grid
ax4.set_theta_zero_location('N')
ax4.set_theta_direction(-1)
ax4.set_ylim(0, max(change_infarcts) * 1.35)
ax4.set_xticks([])  # Remove angular ticks
ax4.set_xticklabels([])  # Remove angular labels
ax4.grid(True, alpha=0.3)  # Keep only radial grid, make it subtle

# Clean radial axis labels
ax4.set_ylabel('New Infarcts', fontweight='bold', fontsize=13, labelpad=40)

# Title
ax4.set_title('New Infarct Burden per Patient\nColored by Fazekas Score', 
             fontweight='bold', pad=35, fontsize=16)

# Add legend
legend_elements = [
    Patch(facecolor=fazekas_colors[1], edgecolor='black', label='Fazekas Score 1'),
    Patch(facecolor=fazekas_colors[2], edgecolor='black', label='Fazekas Score 2'),
    Patch(facecolor=fazekas_colors[3], edgecolor='black', label='Fazekas Score 3')
]
ax4.legend(handles=legend_elements, loc='upper right', bbox_to_anchor=(1.25, 1.1),
          frameon=True, fancybox=True, shadow=True, fontsize=12)

plt.tight_layout()
plt.savefig(f'{mri_plots_path}/Radial_Infarct_Change_by_Fazekas.png', dpi=300, bbox_inches='tight')
print("Saved: Radial_Infarct_Change_by_Fazekas (improved)")
plt.show()

# ============================================================================
# Print summary
print("\n" + "="*80)
print("THREE PLOTS CREATED")
print("="*80)
print("1. Pre vs Post slope plot (original style with better colors)")
print("2. Bar chart showing change in infarct count per patient")
print("3. Grouped boxplot comparing pre vs post by Fazekas score")
print(f"\nAll saved in: {mri_plots_path}/")

# Summary statistics
print("\n" + "="*80)
print("SUMMARY: PRE vs POST INFARCTS BY FAZEKAS SCORE")
print("="*80)
for fazekas in [1, 2, 3]:
    mask = fazekas_scores == fazekas
    n_patients = mask.sum()
    if n_patients > 0:
        pre_mean = pre_infarcts[mask].mean()
        post_mean = post_infarcts[mask].mean()
        print(f"\nFazekas Score {fazekas} (n={n_patients}):")
        print(f"  Pre-TAVI infarcts:  {pre_mean:.1f} ± {pre_infarcts[mask].std():.1f}")
        print(f"  Post-TAVI infarcts: {post_mean:.1f} ± {post_infarcts[mask].std():.1f}")
        print(f"  Mean change: {post_mean - pre_mean:.1f}")
Saved: Pre_vs_Post_Infarcts_by_Fazekas
No description has been provided for this image
Saved: Change_in_Infarcts_by_Fazekas
No description has been provided for this image
Saved: Boxplot_Pre_vs_Post_by_Fazekas
No description has been provided for this image
Saved: Radial_Infarct_Change_by_Fazekas (improved)
No description has been provided for this image
================================================================================
THREE PLOTS CREATED
================================================================================
1. Pre vs Post slope plot (original style with better colors)
2. Bar chart showing change in infarct count per patient
3. Grouped boxplot comparing pre vs post by Fazekas score

All saved in: /Users/valar/Documents/Projects/MRI-TAVI-Cytokines/results/MRI_Analysis/

================================================================================
SUMMARY: PRE vs POST INFARCTS BY FAZEKAS SCORE
================================================================================

Fazekas Score 1 (n=5):
  Pre-TAVI infarcts:  0.0 ± 0.0
  Post-TAVI infarcts: 6.4 ± 3.7
  Mean change: 6.4

Fazekas Score 2 (n=4):
  Pre-TAVI infarcts:  1.5 ± 1.5
  Post-TAVI infarcts: 10.5 ± 8.7
  Mean change: 9.0

Fazekas Score 3 (n=1):
  Pre-TAVI infarcts:  5.0 ± 0.0
  Post-TAVI infarcts: 0.0 ± 0.0
  Mean change: -5.0

Step 4: Spearman Correlation - Clinical¶

In [24]:
import pandas as pd
import numpy as np
from scipy.stats import spearmanr
import matplotlib.pyplot as plt
import seaborn as sns

# Create correlation analysis folder
correlation_path = f"{results_path}/Clinical_Correlations"
os.makedirs(correlation_path, exist_ok=True)
print(f"Created/verified folder: {correlation_path}\n")

# Define predictor variables (PRE)
predictors = {
    'Fazekas Score': mri_df['PRE_SVD Fazekas Score (1,2,3)'],
    'Pre-TAVI Infarcts': mri_df['PRE_Infarcts - Total Number']
}

# Define outcome variables (POST) - with shorter names for display
outcomes = {
    'Total New Infarcts': mri_df['POST_Number of new acute infarcts'],
    'Cortical': mri_df['POST_New supratentorial infarcts - cortical (number)'],
    'Subcortical': mri_df['POST_New supratentorial infarcts - subcortical (number)'],
    'Right Hemisphere': mri_df['POST_New infarcts - Right cerebral hemisphere (number)'],
    'Left Hemisphere': mri_df['POST_New infarcts - Left cerebral hemisphere (number)'],
    'Cerebellum': mri_df['POST_New Infarcts - Cerebellum (number)'],
    'Microhaemorrhage': mri_df['POST_New microhaemorrhage (number)']
}

# Calculate total number of tests for Bonferroni correction
n_tests = len(predictors) * len(outcomes)
bonf_alpha = 0.05 / n_tests

print("="*80)
print("SPEARMAN CORRELATION ANALYSIS")
print("="*80)
print(f"Total number of tests: {n_tests}")
print(f"Bonferroni-corrected alpha: {bonf_alpha:.4f}")
print("="*80 + "\n")

# Create matrices for rho values and p-values
rho_matrix = np.zeros((len(outcomes), len(predictors)))
p_matrix = np.zeros((len(outcomes), len(predictors)))

# Calculate correlations
for i, (out_name, out_values) in enumerate(outcomes.items()):
    for j, (pred_name, pred_values) in enumerate(predictors.items()):
        rho, p_value = spearmanr(pred_values, out_values)
        rho_matrix[i, j] = rho
        p_matrix[i, j] = p_value

# Create DataFrames
rho_df = pd.DataFrame(rho_matrix, 
                      index=list(outcomes.keys()), 
                      columns=list(predictors.keys()))
p_df = pd.DataFrame(p_matrix, 
                    index=list(outcomes.keys()), 
                    columns=list(predictors.keys()))

# ============================================================================
# CREATE FORMATTED TABLE
# ============================================================================
formatted_table = []
for outcome in outcomes.keys():
    row = {'Post-TAVI Outcome': outcome}
    for predictor in predictors.keys():
        rho = rho_df.loc[outcome, predictor]
        p = p_df.loc[outcome, predictor]
        
        # Add significance markers
        if p < bonf_alpha:
            sig = '***'  # Bonferroni significant
        elif p < 0.05:
            sig = '*'    # Raw significant
        else:
            sig = ''
        
        # Format: rho (p-value) sig
        row[predictor] = f"{rho:.3f} (p={p:.3f}){sig}"
    
    formatted_table.append(row)

formatted_df = pd.DataFrame(formatted_table)

print("="*80)
print("CORRELATION TABLE")
print("* = p<0.05 (raw), *** = Bonferroni-corrected")
print("="*80)
print(formatted_df.to_string(index=False))
print("\n")

# Save formatted table
formatted_df.to_csv(f'{correlation_path}/Formatted_Correlation_Table.csv', index=False)

# ============================================================================
# CREATE DETAILED RESULTS TABLE
# ============================================================================
detailed_results = []
for outcome in outcomes.keys():
    for predictor in predictors.keys():
        rho = rho_df.loc[outcome, predictor]
        p = p_df.loc[outcome, predictor]
        
        detailed_results.append({
            'Pre-TAVI Predictor': predictor,
            'Post-TAVI Outcome': outcome,
            'Spearman ρ': f"{rho:.3f}",
            'p-value': f"{p:.4f}",
            'Sig (α=0.05)': '✓' if p < 0.05 else '',
            'Sig (Bonferroni)': '✓' if p < bonf_alpha else ''
        })

detailed_df = pd.DataFrame(detailed_results)
detailed_df = detailed_df.sort_values('p-value')

print("="*80)
print("DETAILED RESULTS (sorted by p-value)")
print("="*80)
print(detailed_df.to_string(index=False))
print("\n")

# Save detailed table
detailed_df.to_csv(f'{correlation_path}/Detailed_Correlation_Results.csv', index=False)

# ============================================================================
# CREATE HEATMAP
# ============================================================================
fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(16, 8))

# Heatmap 1: Correlation coefficients
sns.heatmap(rho_df, annot=True, fmt='.3f', cmap='RdBu_r', center=0, 
            vmin=-1, vmax=1, cbar_kws={'label': 'Spearman ρ'},
            linewidths=0.5, linecolor='black', ax=ax1,
            annot_kws={'fontsize': 11, 'fontweight': 'bold'})
ax1.set_title('Spearman Correlation Coefficients', fontweight='bold', pad=20, fontsize=14)
ax1.set_xlabel('Pre-TAVI Predictors', fontweight='bold', fontsize=12)
ax1.set_ylabel('Post-TAVI Outcomes', fontweight='bold', fontsize=12)
ax1.tick_params(axis='both', labelsize=11)

# Heatmap 2: P-values with significance marking
# Create annotations with significance markers
annot_array = np.empty_like(p_matrix, dtype=object)
for i in range(p_matrix.shape[0]):
    for j in range(p_matrix.shape[1]):
        p_val = p_matrix[i, j]
        if p_val < bonf_alpha:
            annot_array[i, j] = f'{p_val:.3f}***'
        elif p_val < 0.05:
            annot_array[i, j] = f'{p_val:.3f}*'
        else:
            annot_array[i, j] = f'{p_val:.3f}'

# Use reversed colormap (white = low p-value = significant)
sns.heatmap(p_df, annot=annot_array, fmt='', cmap='YlOrRd_r', 
            vmin=0, vmax=0.05, cbar_kws={'label': 'p-value'},
            linewidths=0.5, linecolor='black', ax=ax2,
            annot_kws={'fontsize': 10})
ax2.set_title('P-values (* p<0.05, *** Bonferroni)', fontweight='bold', pad=20, fontsize=14)
ax2.set_xlabel('Pre-TAVI Predictors', fontweight='bold', fontsize=12)
ax2.set_ylabel('Post-TAVI Outcomes', fontweight='bold', fontsize=12)
ax2.tick_params(axis='both', labelsize=11)

plt.tight_layout()
plt.savefig(f'{correlation_path}/Correlation_Heatmaps.png', dpi=300, bbox_inches='tight')
print("="*80)
print("HEATMAP SAVED")
print("="*80)
plt.show()

# ============================================================================
# CREATE SINGLE COMBINED HEATMAP (ALTERNATIVE)
# ============================================================================
fig, ax = plt.subplots(figsize=(10, 10))

# Create custom annotations: rho value + significance stars
annot_combined = np.empty_like(rho_matrix, dtype=object)
for i in range(rho_matrix.shape[0]):
    for j in range(rho_matrix.shape[1]):
        rho = rho_matrix[i, j]
        p = p_matrix[i, j]
        
        if p < bonf_alpha:
            sig = '***'
        elif p < 0.05:
            sig = '*'
        else:
            sig = ''
        
        annot_combined[i, j] = f'{rho:.2f}{sig}'

sns.heatmap(rho_df, annot=annot_combined, fmt='', cmap='RdBu_r', center=0, 
            vmin=-1, vmax=1, cbar_kws={'label': 'Spearman ρ', 'shrink': 0.8},
            linewidths=1, linecolor='black', ax=ax,
            annot_kws={'fontsize': 12, 'fontweight': 'bold'},
            square=True)

ax.set_title('Pre-TAVI Predictors vs Post-TAVI Outcomes\nSpearman Correlations (* p<0.05, *** Bonferroni)', 
            fontweight='bold', pad=20, fontsize=15)
ax.set_xlabel('Pre-TAVI Predictors', fontweight='bold', fontsize=13)
ax.set_ylabel('Post-TAVI Outcomes', fontweight='bold', fontsize=13)
ax.tick_params(axis='both', labelsize=12)

# Rotate x-axis labels for better readability
plt.setp(ax.get_xticklabels(), rotation=45, ha='right')

plt.tight_layout()
plt.savefig(f'{correlation_path}/Correlation_Combined_Heatmap.png', dpi=300, bbox_inches='tight')
print(f"Combined heatmap saved to: {correlation_path}/Correlation_Combined_Heatmap.png\n")
plt.show()

# ============================================================================
# SUMMARY
# ============================================================================
print("="*80)
print("SUMMARY OF SIGNIFICANT CORRELATIONS")
print("="*80)

sig_bonf = detailed_df[detailed_df['Sig (Bonferroni)'] == '✓']
sig_raw = detailed_df[detailed_df['Sig (α=0.05)'] == '✓']

print(f"\nBonferroni-corrected (α={bonf_alpha:.4f}):")
if len(sig_bonf) > 0:
    print(sig_bonf[['Pre-TAVI Predictor', 'Post-TAVI Outcome', 'Spearman ρ', 'p-value']].to_string(index=False))
else:
    print("  No significant correlations after Bonferroni correction")

print(f"\nRaw significance (α=0.05):")
if len(sig_raw) > 0:
    print(sig_raw[['Pre-TAVI Predictor', 'Post-TAVI Outcome', 'Spearman ρ', 'p-value']].to_string(index=False))
else:
    print("  No significant correlations at α=0.05")

print("\n" + "="*80)
print("FILES SAVED")
print("="*80)
print(f"Location: {correlation_path}/")
print("\nFiles created:")
print("  - Formatted_Correlation_Table.csv")
print("  - Detailed_Correlation_Results.csv")
print("  - Correlation_Heatmaps.png (side-by-side: rho and p-values)")
print("  - Correlation_Combined_Heatmap.png (single heatmap with significance)")
Created/verified folder: /Users/valar/Documents/Projects/MRI-TAVI-Cytokines/results/Clinical_Correlations

================================================================================
SPEARMAN CORRELATION ANALYSIS
================================================================================
Total number of tests: 14
Bonferroni-corrected alpha: 0.0036
================================================================================

================================================================================
CORRELATION TABLE
* = p<0.05 (raw), *** = Bonferroni-corrected
================================================================================
 Post-TAVI Outcome    Fazekas Score Pre-TAVI Infarcts
Total New Infarcts -0.151 (p=0.677)   0.103 (p=0.777)
          Cortical  0.003 (p=0.993)   0.167 (p=0.645)
       Subcortical -0.222 (p=0.538)   0.026 (p=0.943)
  Right Hemisphere -0.260 (p=0.468)  -0.004 (p=0.992)
   Left Hemisphere -0.074 (p=0.839)   0.148 (p=0.683)
        Cerebellum  0.060 (p=0.868)   0.274 (p=0.443)
  Microhaemorrhage  0.257 (p=0.474)   0.459 (p=0.182)


================================================================================
DETAILED RESULTS (sorted by p-value)
================================================================================
Pre-TAVI Predictor  Post-TAVI Outcome Spearman ρ p-value Sig (α=0.05) Sig (Bonferroni)
 Pre-TAVI Infarcts   Microhaemorrhage      0.459  0.1817                              
 Pre-TAVI Infarcts         Cerebellum      0.274  0.4434                              
     Fazekas Score   Right Hemisphere     -0.260  0.4680                              
     Fazekas Score   Microhaemorrhage      0.257  0.4742                              
     Fazekas Score        Subcortical     -0.222  0.5379                              
 Pre-TAVI Infarcts           Cortical      0.167  0.6446                              
     Fazekas Score Total New Infarcts     -0.151  0.6767                              
 Pre-TAVI Infarcts    Left Hemisphere      0.148  0.6827                              
 Pre-TAVI Infarcts Total New Infarcts      0.103  0.7768                              
     Fazekas Score    Left Hemisphere     -0.074  0.8387                              
     Fazekas Score         Cerebellum      0.060  0.8684                              
 Pre-TAVI Infarcts        Subcortical      0.026  0.9431                              
 Pre-TAVI Infarcts   Right Hemisphere     -0.004  0.9922                              
     Fazekas Score           Cortical      0.003  0.9926                              


================================================================================
HEATMAP SAVED
================================================================================
No description has been provided for this image
Combined heatmap saved to: /Users/valar/Documents/Projects/MRI-TAVI-Cytokines/results/Clinical_Correlations/Correlation_Combined_Heatmap.png

No description has been provided for this image
================================================================================
SUMMARY OF SIGNIFICANT CORRELATIONS
================================================================================

Bonferroni-corrected (α=0.0036):
  No significant correlations after Bonferroni correction

Raw significance (α=0.05):
  No significant correlations at α=0.05

================================================================================
FILES SAVED
================================================================================
Location: /Users/valar/Documents/Projects/MRI-TAVI-Cytokines/results/Clinical_Correlations/

Files created:
  - Formatted_Correlation_Table.csv
  - Detailed_Correlation_Results.csv
  - Correlation_Heatmaps.png (side-by-side: rho and p-values)
  - Correlation_Combined_Heatmap.png (single heatmap with significance)

Step 5: Spearman Correlation - Cytokines¶

In [23]:
import pandas as pd
import numpy as np
from scipy.stats import spearmanr
import matplotlib.pyplot as plt
import seaborn as sns
import warnings
from scipy.stats import ConstantInputWarning
import os

# Suppress the constant input warning
warnings.filterwarnings('ignore', category=ConstantInputWarning)

# Create correlation analysis folder
cytokine_correlation_path = f"{results_path}/Cytokine_Correlations"
os.makedirs(cytokine_correlation_path, exist_ok=True)
print(f"Created/verified folder: {cytokine_correlation_path}\n")

# ============================================================================
# PREPARE CYTOKINE DATA
# ============================================================================

# List of cytokines to analyze
cytokines = ['IL-1β', 'IL-2', 'IL-4', 'IL-6', 'IL-8', 'IL-10', 'IL-12p70', 
             'IL-13', 'TNF-α', 'IFN-γ', 'GFAP', 'NF-L', 'S100B', 'Tau']

# Reshape cytokine data to wide format (one row per patient)
cytokine_wide = {}
for subject in cytokines_df['Subject'].unique():
    subject_data = cytokines_df[cytokines_df['Subject'] == subject].sort_values('Time point')
    cytokine_wide[subject] = {}
    
    for cytokine in cytokines:
        values = subject_data[cytokine].values
        # Individual timepoints
        cytokine_wide[subject][f'{cytokine}_T1'] = values[0] if len(values) >= 1 else np.nan
        cytokine_wide[subject][f'{cytokine}_T2'] = values[1] if len(values) >= 2 else np.nan
        cytokine_wide[subject][f'{cytokine}_T3'] = values[2] if len(values) >= 3 else np.nan
        cytokine_wide[subject][f'{cytokine}_T4'] = values[3] if len(values) >= 4 else np.nan
        
        # Calculate changes (delta values)
        if len(values) >= 2:
            cytokine_wide[subject][f'{cytokine}_T1-T2'] = values[1] - values[0]
        if len(values) >= 3:
            cytokine_wide[subject][f'{cytokine}_T1-T3'] = values[2] - values[0]
            cytokine_wide[subject][f'{cytokine}_T2-T3'] = values[2] - values[1]
        if len(values) >= 4:
            cytokine_wide[subject][f'{cytokine}_T1-T4'] = values[3] - values[0]
            cytokine_wide[subject][f'{cytokine}_T2-T4'] = values[3] - values[1]
            cytokine_wide[subject][f'{cytokine}_T3-T4'] = values[3] - values[2]

# Convert to DataFrame
cytokine_wide_df = pd.DataFrame(cytokine_wide).T
cytokine_wide_df.index.name = 'Subject'
cytokine_wide_df = cytokine_wide_df.reset_index()

# Merge with MRI data
merged_df = mri_df.merge(cytokine_wide_df, on='Subject')

print("="*80)
print("DATA PREPARATION COMPLETE")
print("="*80)
print(f"Number of subjects: {len(merged_df)}")
print(f"Number of cytokine variables: {len([c for c in merged_df.columns if any(cyt in c for cyt in cytokines)])}")
print("="*80 + "\n")

# ============================================================================
# DEFINE PREDICTORS AND OUTCOMES
# ============================================================================

# Cytokine predictors (all timepoints and changes)
cytokine_predictors = {}
for col in merged_df.columns:
    if any(cyt in col for cyt in cytokines) and (col.endswith(('_T1', '_T2', '_T3', '_T4')) or 
                                                   'T1-T2' in col or 'T1-T3' in col or 'T1-T4' in col or 
                                                   'T2-T3' in col or 'T2-T4' in col or 'T3-T4' in col):
        cytokine_predictors[col] = merged_df[col]

# MRI outcomes
mri_outcomes = {
    'Fazekas Score': merged_df['PRE_SVD Fazekas Score (1,2,3)'],
    'Pre-TAVI Infarcts': merged_df['PRE_Infarcts - Total Number'],
    'Total New Infarcts': merged_df['POST_Number of new acute infarcts'],
    'Cortical': merged_df['POST_New supratentorial infarcts - cortical (number)'],
    'Subcortical': merged_df['POST_New supratentorial infarcts - subcortical (number)'],
    'Right Hemisphere': merged_df['POST_New infarcts - Right cerebral hemisphere (number)'],
    'Left Hemisphere': merged_df['POST_New infarcts - Left cerebral hemisphere (number)'],
    'Cerebellum': merged_df['POST_New Infarcts - Cerebellum (number)'],
    'Microhaemorrhage': merged_df['POST_New microhaemorrhage (number)']
}

print("="*80)
print("CYTOKINE-MRI CORRELATION ANALYSIS")
print("="*80)
print("Using STRATIFIED Bonferroni correction:")
print("  - Each comparison group (T1, T2, T3, T4, changes) corrected separately")
print("  - Tests per group: 14 cytokines × 9 MRI outcomes = 126 tests")
print("  - Bonferroni alpha per group: 0.05/126 = 0.000397")
print("="*80 + "\n")

# ============================================================================
# CALCULATE CORRELATIONS WITH STRATIFIED BONFERRONI
# ============================================================================

results_list = []

# Loop through each comparison group separately
for group_name in ['T1', 'T2', 'T3', 'T4', 'T1-T2', 'T1-T3', 'T1-T4', 'T2-T3', 'T2-T4', 'T3-T4']:
    # Get cytokines for this group
    if group_name in ['T1', 'T2', 'T3', 'T4']:
        group_cytokines = {k: v for k, v in cytokine_predictors.items() if k.endswith(f'_{group_name}')}
    else:
        group_cytokines = {k: v for k, v in cytokine_predictors.items() if group_name in k}
    
    # Calculate Bonferroni for this group only
    n_tests_group = len(group_cytokines) * len(mri_outcomes)
    bonf_alpha_group = 0.05 / n_tests_group if n_tests_group > 0 else 0.05
    
    print(f"Processing {group_name}: {len(group_cytokines)} cytokines × {len(mri_outcomes)} outcomes = {n_tests_group} tests, α={bonf_alpha_group:.6f}")
    
    # Calculate correlations for this group
    for pred_name, pred_values in group_cytokines.items():
        for out_name, out_values in mri_outcomes.items():
            # Check for NaN values
            valid_mask = ~(pred_values.isna() | out_values.isna())
            
            if valid_mask.sum() >= 3:  # Need at least 3 valid pairs
                # Check for constant variables to avoid warnings
                if pred_values[valid_mask].std() == 0 or out_values[valid_mask].std() == 0:
                    results_list.append({
                        'Cytokine Predictor': pred_name,
                        'MRI Outcome': out_name,
                        'Spearman ρ': np.nan,
                        'p-value': np.nan,
                        'n': valid_mask.sum(),
                        'n_tests_in_group': n_tests_group,
                        'Bonferroni_alpha_group': bonf_alpha_group,
                        'Sig (α=0.05)': '',
                        'Sig (Bonferroni)': ''
                    })
                else:
                    rho, p_value = spearmanr(pred_values[valid_mask], out_values[valid_mask])
                    
                    results_list.append({
                        'Cytokine Predictor': pred_name,
                        'MRI Outcome': out_name,
                        'Spearman ρ': rho,
                        'p-value': p_value,
                        'n': valid_mask.sum(),
                        'n_tests_in_group': n_tests_group,
                        'Bonferroni_alpha_group': bonf_alpha_group,
                        'Sig (α=0.05)': '✓' if p_value < 0.05 else '',
                        'Sig (Bonferroni)': '✓' if p_value < bonf_alpha_group else ''
                    })

# Create results DataFrame
results_df = pd.DataFrame(results_list)
results_df = results_df.sort_values('p-value')

# ============================================================================
# CREATE ENHANCED SUMMARY TABLE WITH SIGNIFICANCE MARKERS
# ============================================================================

print("\n" + "="*80)
print("ENHANCED SUMMARY TABLE - TOP 50 CORRELATIONS")
print("="*80)

# Create enhanced display with significance markers
enhanced_results = results_df.copy()

# Add significance marker column
def get_sig_marker(row):
    if pd.isna(row['p-value']):
        return 'N/A'
    elif row['Sig (Bonferroni)'] == '✓':
        return '***'
    elif row['Sig (α=0.05)'] == '✓':
        return '*'
    else:
        return 'ns'

enhanced_results['Significance'] = enhanced_results.apply(get_sig_marker, axis=1)

# Create formatted display version
display_results = enhanced_results.copy()
display_results['Spearman ρ'] = display_results['Spearman ρ'].apply(
    lambda x: f"{x:.3f}" if not pd.isna(x) else "N/A")
display_results['p-value'] = display_results['p-value'].apply(
    lambda x: f"{x:.4f}" if not pd.isna(x) else "N/A")

# Show top 50
print(display_results[['Cytokine Predictor', 'MRI Outcome', 'Spearman ρ', 
                       'p-value', 'Significance', 'n']].head(50).to_string(index=False))

# ============================================================================
# CREATE COMPREHENSIVE TSV FILE FOR EASY EXPLORATION
# ============================================================================

# Add a "Comparison" column that matches heatmap titles
def get_comparison_group(cytokine_name):
    """Extract the timepoint/change group from cytokine name"""
    if cytokine_name.endswith('_T1'):
        return 'T1'
    elif cytokine_name.endswith('_T2'):
        return 'T2'
    elif cytokine_name.endswith('_T3'):
        return 'T3'
    elif cytokine_name.endswith('_T4'):
        return 'T4'
    elif 'T1-T2' in cytokine_name:
        return 'T1-T2 (Change)'
    elif 'T1-T3' in cytokine_name:
        return 'T1-T3 (Change)'
    elif 'T1-T4' in cytokine_name:
        return 'T1-T4 (Change)'
    elif 'T2-T3' in cytokine_name:
        return 'T2-T3 (Change)'
    elif 'T2-T4' in cytokine_name:
        return 'T2-T4 (Change)'
    elif 'T3-T4' in cytokine_name:
        return 'T3-T4 (Change)'
    else:
        return 'Unknown'

# Extract just the cytokine name (without timepoint suffix)
def get_cytokine_name_only(full_name):
    """Extract cytokine name without timepoint"""
    for suffix in ['_T1', '_T2', '_T3', '_T4', '_T1-T2', '_T1-T3', '_T1-T4', 
                   '_T2-T3', '_T2-T4', '_T3-T4']:
        if full_name.endswith(suffix):
            return full_name.replace(suffix, '')
    return full_name

# Create comprehensive table
comprehensive_table = enhanced_results.copy()
comprehensive_table['Comparison_Group'] = comprehensive_table['Cytokine Predictor'].apply(get_comparison_group)
comprehensive_table['Cytokine'] = comprehensive_table['Cytokine Predictor'].apply(get_cytokine_name_only)

# Calculate Bonferroni-corrected p-value (adjusted within group)
comprehensive_table['p-value (Bonferroni)'] = comprehensive_table.apply(
    lambda row: row['p-value'] * row['n_tests_in_group'] if not pd.isna(row['p-value']) else np.nan,
    axis=1
)

# Reorder columns for easier exploration
comprehensive_table = comprehensive_table[[
    'Comparison_Group',
    'Cytokine',
    'Cytokine Predictor',
    'MRI Outcome',
    'Spearman ρ',
    'p-value',
    'p-value (Bonferroni)',
    'n_tests_in_group',
    'Bonferroni_alpha_group',
    'Significance',
    'Sig (α=0.05)',
    'Sig (Bonferroni)',
    'n'
]]

# Sort by Comparison Group, then by p-value
comprehensive_table = comprehensive_table.sort_values(['Comparison_Group', 'p-value'])

# Save as TSV
comprehensive_table.to_csv(f'{cytokine_correlation_path}/Comprehensive_Correlations.tsv', 
                          sep='\t', index=False)

print("\n" + "="*80)
print("COMPREHENSIVE TSV FILE CREATED")
print("="*80)
print(f"File: {cytokine_correlation_path}/Comprehensive_Correlations.tsv")
print("\nThis file contains:")
print("  - Comparison_Group: Matches heatmap titles (T1, T2, T3, T4, or Change periods)")
print("  - Cytokine: Just the cytokine name")
print("  - Cytokine Predictor: Full variable name with timepoint")
print("  - MRI Outcome: The outcome being correlated")
print("  - Spearman ρ: Correlation coefficient")
print("  - p-value: Raw p-value")
print("  - p-value (Bonferroni): Bonferroni-corrected p-value (within group)")
print("  - n_tests_in_group: Number of tests in this comparison group")
print("  - Bonferroni_alpha_group: Significance threshold for this group")
print("  - Significance: *** (Bonferroni), * (p<0.05), ns (not significant), or N/A (constant)")
print("  - Sig (α=0.05): ✓ or empty")
print("  - Sig (Bonferroni): ✓ or empty")
print("  - n: Sample size for that correlation")
print("\nYou can easily filter by Comparison_Group to see all correlations for each heatmap!")

# Save full results
display_results.to_csv(f'{cytokine_correlation_path}/All_Cytokine_MRI_Correlations.csv', index=False)

# ============================================================================
# SIGNIFICANT CORRELATIONS SUMMARY
# ============================================================================

sig_bonf = results_df[results_df['Sig (Bonferroni)'] == '✓']
sig_raw = results_df[results_df['Sig (α=0.05)'] == '✓']

print("\n" + "="*80)
print(f"BONFERRONI-CORRECTED SIGNIFICANT CORRELATIONS (stratified by group)")
print("="*80)
if len(sig_bonf) > 0:
    print(f"Found {len(sig_bonf)} significant correlations after Bonferroni correction")
    sig_bonf_display = sig_bonf.copy()
    sig_bonf_display['Spearman ρ'] = sig_bonf_display['Spearman ρ'].apply(lambda x: f"{x:.3f}")
    sig_bonf_display['p-value'] = sig_bonf_display['p-value'].apply(lambda x: f"{x:.6f}")
    print(sig_bonf_display[['Cytokine Predictor', 'MRI Outcome', 'Spearman ρ', 
                             'p-value', 'Bonferroni_alpha_group']].to_string(index=False))
    sig_bonf_display.to_csv(f'{cytokine_correlation_path}/Significant_Bonferroni_Correlations.csv', index=False)
else:
    print(f"No significant correlations after stratified Bonferroni correction")

print("\n" + "="*80)
print(f"RAW SIGNIFICANT CORRELATIONS (α=0.05)")
print("="*80)
if len(sig_raw) > 0:
    print(f"Found {len(sig_raw)} significant correlations at α=0.05")
    sig_raw_display = sig_raw.copy()
    sig_raw_display['Spearman ρ'] = sig_raw_display['Spearman ρ'].apply(lambda x: f"{x:.3f}")
    sig_raw_display['p-value'] = sig_raw_display['p-value'].apply(lambda x: f"{x:.4f}")
    print(sig_raw_display.head(30)[['Cytokine Predictor', 'MRI Outcome', 'Spearman ρ', 'p-value']].to_string(index=False))
    sig_raw_display.to_csv(f'{cytokine_correlation_path}/Significant_Raw_Correlations.csv', index=False)
else:
    print("No significant correlations at α=0.05")

# ============================================================================
# CREATE SUMMARY BY COMPARISON GROUP
# ============================================================================

print("\n" + "="*80)
print("SUMMARY BY COMPARISON GROUP")
print("="*80)

for group in ['T1', 'T2', 'T3', 'T4', 'T1-T2 (Change)', 'T1-T3 (Change)', 
              'T1-T4 (Change)', 'T2-T3 (Change)', 'T2-T4 (Change)', 'T3-T4 (Change)']:
    group_data = comprehensive_table[comprehensive_table['Comparison_Group'] == group]
    n_total = len(group_data)
    n_sig_bonf = (group_data['Sig (Bonferroni)'] == '✓').sum()
    n_sig_raw = (group_data['Sig (α=0.05)'] == '✓').sum()
    
    if n_total > 0:
        bonf_alpha = group_data['Bonferroni_alpha_group'].iloc[0]
        print(f"\n{group} (Bonferroni α={bonf_alpha:.6f}):")
        print(f"  Total correlations: {n_total}")
        print(f"  Significant (Bonferroni): {n_sig_bonf}")
        print(f"  Significant (α=0.05): {n_sig_raw}")
        print(f"  Non-significant: {n_total - n_sig_raw}")
        
        if n_sig_raw > 0:
            print(f"  Top 3 correlations:")
            top3 = group_data[group_data['Sig (α=0.05)'] == '✓'].head(3)
            for _, row in top3.iterrows():
                rho_val = row['Spearman ρ']
                p_val = row['p-value']
                if not pd.isna(rho_val) and not pd.isna(p_val):
                    print(f"    - {row['Cytokine']} vs {row['MRI Outcome']}: ρ={rho_val:.3f}, p={p_val:.4f} {row['Significance']}")

# ============================================================================
# CREATE HEATMAPS FOR EACH TIMEPOINT/CHANGE
# ============================================================================

print("\n" + "="*80)
print("GENERATING HEATMAPS")
print("="*80)

# Group cytokines by timepoint/change type
timepoint_groups = {
    'T1': [c for c in cytokine_predictors.keys() if c.endswith('_T1')],
    'T2': [c for c in cytokine_predictors.keys() if c.endswith('_T2')],
    'T3': [c for c in cytokine_predictors.keys() if c.endswith('_T3')],
    'T4': [c for c in cytokine_predictors.keys() if c.endswith('_T4')],
    'T1-T2': [c for c in cytokine_predictors.keys() if 'T1-T2' in c],
    'T1-T3': [c for c in cytokine_predictors.keys() if 'T1-T3' in c],
    'T1-T4': [c for c in cytokine_predictors.keys() if 'T1-T4' in c],
    'T2-T3': [c for c in cytokine_predictors.keys() if 'T2-T3' in c],
    'T2-T4': [c for c in cytokine_predictors.keys() if 'T2-T4' in c],
    'T3-T4': [c for c in cytokine_predictors.keys() if 'T3-T4' in c]
}

# Create heatmap for each timepoint group
for group_name, cytokine_cols in timepoint_groups.items():
    if len(cytokine_cols) == 0:
        continue
    
    # Get Bonferroni alpha for this group
    n_tests_group = len(cytokine_cols) * len(mri_outcomes)
    bonf_alpha_group = 0.05 / n_tests_group
    
    # Create matrices
    rho_matrix = np.zeros((len(cytokine_cols), len(mri_outcomes)))
    p_matrix = np.zeros((len(cytokine_cols), len(mri_outcomes)))
    
    for i, cyt_col in enumerate(cytokine_cols):
        for j, (out_name, out_values) in enumerate(mri_outcomes.items()):
            pred_values = cytokine_predictors[cyt_col]
            valid_mask = ~(pred_values.isna() | out_values.isna())
            
            if valid_mask.sum() >= 3:
                # Check for constant variables
                if pred_values[valid_mask].std() == 0 or out_values[valid_mask].std() == 0:
                    rho_matrix[i, j] = np.nan
                    p_matrix[i, j] = np.nan
                else:
                    rho, p = spearmanr(pred_values[valid_mask], out_values[valid_mask])
                    rho_matrix[i, j] = rho
                    p_matrix[i, j] = p
            else:
                rho_matrix[i, j] = np.nan
                p_matrix[i, j] = np.nan
    
    # Create row labels (just cytokine names, remove timepoint suffix)
    row_labels = [col.replace(f'_{group_name}', '').replace(f'_{group_name}', '') for col in cytokine_cols]
    
    # Create heatmap
    fig, ax = plt.subplots(figsize=(12, 10))
    
    # Create annotations with significance markers (using group-specific alpha)
    annot_array = np.empty_like(rho_matrix, dtype=object)
    for i in range(rho_matrix.shape[0]):
        for j in range(rho_matrix.shape[1]):
            if np.isnan(rho_matrix[i, j]):
                annot_array[i, j] = ''
            else:
                rho = rho_matrix[i, j]
                p = p_matrix[i, j]
                
                if p < bonf_alpha_group:
                    sig = '***'
                elif p < 0.05:
                    sig = '*'
                else:
                    sig = ''
                
                annot_array[i, j] = f'{rho:.2f}{sig}'
    
    sns.heatmap(rho_matrix, annot=annot_array, fmt='', cmap='RdBu_r', center=0, 
                vmin=-1, vmax=1, cbar_kws={'label': 'Spearman ρ'},
                linewidths=0.5, linecolor='gray', ax=ax,
                annot_kws={'fontsize': 9},
                xticklabels=list(mri_outcomes.keys()),
                yticklabels=row_labels)
    
    # Determine if timepoint or change
    if '-' in group_name:
        title_text = f'Cytokine Changes ({group_name}) vs MRI Outcomes'
    else:
        title_text = f'Cytokines at {group_name} vs MRI Outcomes'
    
    ax.set_title(f'{title_text}\n(* p<0.05, *** Bonferroni α={bonf_alpha_group:.6f})', 
                fontweight='bold', pad=20, fontsize=14)
    ax.set_xlabel('MRI Outcomes', fontweight='bold', fontsize=12)
    ax.set_ylabel('Cytokines', fontweight='bold', fontsize=12)
    
    plt.setp(ax.get_xticklabels(), rotation=45, ha='right')
    plt.setp(ax.get_yticklabels(), rotation=0)
    
    plt.tight_layout()
    plt.savefig(f'{cytokine_correlation_path}/Heatmap_{group_name}.png', dpi=300, bbox_inches='tight')
    print(f"Saved heatmap: {group_name}")
    plt.show()

print("\n" + "="*80)
print("ALL FILES SAVED")
print("="*80)
print(f"Location: {cytokine_correlation_path}/")
print("\nFiles created:")
print("  - Comprehensive_Correlations.tsv (main file for exploration)")
print("  - All_Cytokine_MRI_Correlations.csv (complete results)")
print("  - Significant_Bonferroni_Correlations.csv (if any)")
print("  - Significant_Raw_Correlations.csv (if any)")
print("  - Heatmap_T1.png through Heatmap_T3-T4.png (10 heatmaps)")
print("="*80)
Created/verified folder: /Users/valar/Documents/Projects/MRI-TAVI-Cytokines/results/Cytokine_Correlations

================================================================================
DATA PREPARATION COMPLETE
================================================================================
Number of subjects: 10
Number of cytokine variables: 140
================================================================================

================================================================================
CYTOKINE-MRI CORRELATION ANALYSIS
================================================================================
Using STRATIFIED Bonferroni correction:
  - Each comparison group (T1, T2, T3, T4, changes) corrected separately
  - Tests per group: 14 cytokines × 9 MRI outcomes = 126 tests
  - Bonferroni alpha per group: 0.05/126 = 0.000397
================================================================================

Processing T1: 14 cytokines × 9 outcomes = 126 tests, α=0.000397
Processing T2: 14 cytokines × 9 outcomes = 126 tests, α=0.000397
Processing T3: 14 cytokines × 9 outcomes = 126 tests, α=0.000397
Processing T4: 14 cytokines × 9 outcomes = 126 tests, α=0.000397
Processing T1-T2: 14 cytokines × 9 outcomes = 126 tests, α=0.000397
Processing T1-T3: 14 cytokines × 9 outcomes = 126 tests, α=0.000397
Processing T1-T4: 14 cytokines × 9 outcomes = 126 tests, α=0.000397
Processing T2-T3: 14 cytokines × 9 outcomes = 126 tests, α=0.000397
Processing T2-T4: 14 cytokines × 9 outcomes = 126 tests, α=0.000397
Processing T3-T4: 14 cytokines × 9 outcomes = 126 tests, α=0.000397

================================================================================
ENHANCED SUMMARY TABLE - TOP 50 CORRELATIONS
================================================================================
Cytokine Predictor        MRI Outcome Spearman ρ p-value Significance  n
       IL-1β_T1-T4    Left Hemisphere     -0.927  0.0001          *** 10
            Tau_T4           Cortical      0.849  0.0019            * 10
    IL-12p70_T1-T2      Fazekas Score      0.840  0.0023            * 10
       IL-1β_T1-T4 Total New Infarcts     -0.839  0.0024            * 10
           GFAP_T4      Fazekas Score      0.838  0.0025            * 10
            Tau_T4   Right Hemisphere      0.809  0.0046            * 10
           GFAP_T1      Fazekas Score      0.804  0.0051            * 10
         Tau_T1-T4           Cortical      0.800  0.0054            * 10
        GFAP_T1-T3         Cerebellum      0.799  0.0056            * 10
            Tau_T4 Total New Infarcts      0.790  0.0065            * 10
       TNF-α_T3-T4      Fazekas Score     -0.777  0.0082            * 10
        IL-6_T1-T3  Pre-TAVI Infarcts      0.775  0.0085            * 10
           GFAP_T3      Fazekas Score      0.771  0.0091            * 10
       IFN-γ_T3-T4      Fazekas Score     -0.771  0.0091            * 10
           IL-8_T3      Fazekas Score      0.771  0.0091            * 10
           GFAP_T4  Pre-TAVI Infarcts      0.761  0.0106            * 10
        IL-8_T3-T4      Fazekas Score     -0.750  0.0124            * 10
        NF-L_T1-T3    Left Hemisphere      0.750  0.0125            * 10
         Tau_T1-T4   Right Hemisphere      0.740  0.0144            * 10
    IL-12p70_T1-T2  Pre-TAVI Infarcts      0.739  0.0146            * 10
       IFN-γ_T1-T4  Pre-TAVI Infarcts     -0.720  0.0189            * 10
         Tau_T1-T4    Left Hemisphere      0.720  0.0190            * 10
        IL-2_T3-T4      Fazekas Score     -0.717  0.0196            * 10
            Tau_T4    Left Hemisphere      0.707  0.0221            * 10
         Tau_T1-T4 Total New Infarcts      0.705  0.0227            * 10
            Tau_T3         Cerebellum      0.703  0.0233            * 10
         Tau_T3-T4           Cortical      0.702  0.0237            * 10
        NF-L_T1-T4         Cerebellum      0.696  0.0253            * 10
         Tau_T3-T4   Right Hemisphere      0.696  0.0253            * 10
       S100B_T1-T2           Cortical      0.695  0.0255            * 10
           GFAP_T1  Pre-TAVI Infarcts      0.692  0.0265            * 10
           NF-L_T3  Pre-TAVI Infarcts      0.692  0.0265            * 10
       TNF-α_T2-T3         Cerebellum     -0.690  0.0273            * 10
        IL-8_T1-T4      Fazekas Score     -0.683  0.0294            * 10
           IL-8_T2      Fazekas Score      0.677  0.0316            * 10
        NF-L_T1-T3 Total New Infarcts      0.675  0.0323            * 10
       IFN-γ_T3-T4  Pre-TAVI Infarcts     -0.672  0.0334            * 10
           NF-L_T1  Pre-TAVI Infarcts      0.672  0.0334            * 10
         Tau_T1-T3    Left Hemisphere      0.665  0.0360            * 10
            Tau_T3    Left Hemisphere      0.665  0.0360            * 10
        NF-L_T1-T4 Total New Infarcts      0.663  0.0368            * 10
       S100B_T1-T4   Right Hemisphere      0.659  0.0383            * 10
       S100B_T1-T3           Cortical      0.659  0.0384            * 10
        IL-8_T3-T4  Pre-TAVI Infarcts     -0.658  0.0386            * 10
        IL-6_T1-T3      Fazekas Score      0.657  0.0392            * 10
       IL-1β_T1-T4           Cortical     -0.652  0.0409            * 10
       IL-12p70_T3         Cerebellum     -0.652  0.0412            * 10
       IL-12p70_T2         Cerebellum     -0.652  0.0412            * 10
        GFAP_T1-T4  Pre-TAVI Infarcts      0.651  0.0414            * 10
           IL-8_T1      Fazekas Score      0.650  0.0419            * 10

================================================================================
COMPREHENSIVE TSV FILE CREATED
================================================================================
File: /Users/valar/Documents/Projects/MRI-TAVI-Cytokines/results/Cytokine_Correlations/Comprehensive_Correlations.tsv

This file contains:
  - Comparison_Group: Matches heatmap titles (T1, T2, T3, T4, or Change periods)
  - Cytokine: Just the cytokine name
  - Cytokine Predictor: Full variable name with timepoint
  - MRI Outcome: The outcome being correlated
  - Spearman ρ: Correlation coefficient
  - p-value: Raw p-value
  - p-value (Bonferroni): Bonferroni-corrected p-value (within group)
  - n_tests_in_group: Number of tests in this comparison group
  - Bonferroni_alpha_group: Significance threshold for this group
  - Significance: *** (Bonferroni), * (p<0.05), ns (not significant), or N/A (constant)
  - Sig (α=0.05): ✓ or empty
  - Sig (Bonferroni): ✓ or empty
  - n: Sample size for that correlation

You can easily filter by Comparison_Group to see all correlations for each heatmap!

================================================================================
BONFERRONI-CORRECTED SIGNIFICANT CORRELATIONS (stratified by group)
================================================================================
Found 1 significant correlations after Bonferroni correction
Cytokine Predictor     MRI Outcome Spearman ρ  p-value  Bonferroni_alpha_group
       IL-1β_T1-T4 Left Hemisphere     -0.927 0.000115                0.000397

================================================================================
RAW SIGNIFICANT CORRELATIONS (α=0.05)
================================================================================
Found 54 significant correlations at α=0.05
Cytokine Predictor        MRI Outcome Spearman ρ p-value
       IL-1β_T1-T4    Left Hemisphere     -0.927  0.0001
            Tau_T4           Cortical      0.849  0.0019
    IL-12p70_T1-T2      Fazekas Score      0.840  0.0023
       IL-1β_T1-T4 Total New Infarcts     -0.839  0.0024
           GFAP_T4      Fazekas Score      0.838  0.0025
            Tau_T4   Right Hemisphere      0.809  0.0046
           GFAP_T1      Fazekas Score      0.804  0.0051
         Tau_T1-T4           Cortical      0.800  0.0054
        GFAP_T1-T3         Cerebellum      0.799  0.0056
            Tau_T4 Total New Infarcts      0.790  0.0065
       TNF-α_T3-T4      Fazekas Score     -0.777  0.0082
        IL-6_T1-T3  Pre-TAVI Infarcts      0.775  0.0085
           GFAP_T3      Fazekas Score      0.771  0.0091
       IFN-γ_T3-T4      Fazekas Score     -0.771  0.0091
           IL-8_T3      Fazekas Score      0.771  0.0091
           GFAP_T4  Pre-TAVI Infarcts      0.761  0.0106
        IL-8_T3-T4      Fazekas Score     -0.750  0.0124
        NF-L_T1-T3    Left Hemisphere      0.750  0.0125
         Tau_T1-T4   Right Hemisphere      0.740  0.0144
    IL-12p70_T1-T2  Pre-TAVI Infarcts      0.739  0.0146
       IFN-γ_T1-T4  Pre-TAVI Infarcts     -0.720  0.0189
         Tau_T1-T4    Left Hemisphere      0.720  0.0190
        IL-2_T3-T4      Fazekas Score     -0.717  0.0196
            Tau_T4    Left Hemisphere      0.707  0.0221
         Tau_T1-T4 Total New Infarcts      0.705  0.0227
            Tau_T3         Cerebellum      0.703  0.0233
         Tau_T3-T4           Cortical      0.702  0.0237
        NF-L_T1-T4         Cerebellum      0.696  0.0253
         Tau_T3-T4   Right Hemisphere      0.696  0.0253
       S100B_T1-T2           Cortical      0.695  0.0255

================================================================================
SUMMARY BY COMPARISON GROUP
================================================================================

T1 (Bonferroni α=0.000397):
  Total correlations: 126
  Significant (Bonferroni): 0
  Significant (α=0.05): 4
  Non-significant: 122
  Top 3 correlations:
    - GFAP vs Fazekas Score: ρ=0.804, p=0.0051 *
    - GFAP vs Pre-TAVI Infarcts: ρ=0.692, p=0.0265 *
    - NF-L vs Pre-TAVI Infarcts: ρ=0.672, p=0.0334 *

T2 (Bonferroni α=0.000397):
  Total correlations: 126
  Significant (Bonferroni): 0
  Significant (α=0.05): 2
  Non-significant: 124
  Top 3 correlations:
    - IL-8 vs Fazekas Score: ρ=0.677, p=0.0316 *
    - IL-12p70 vs Cerebellum: ρ=-0.652, p=0.0412 *

T3 (Bonferroni α=0.000397):
  Total correlations: 126
  Significant (Bonferroni): 0
  Significant (α=0.05): 7
  Non-significant: 119
  Top 3 correlations:
    - GFAP vs Fazekas Score: ρ=0.771, p=0.0091 *
    - IL-8 vs Fazekas Score: ρ=0.771, p=0.0091 *
    - Tau vs Cerebellum: ρ=0.703, p=0.0233 *

T4 (Bonferroni α=0.000397):
  Total correlations: 126
  Significant (Bonferroni): 0
  Significant (α=0.05): 6
  Non-significant: 120
  Top 3 correlations:
    - Tau vs Cortical: ρ=0.849, p=0.0019 *
    - GFAP vs Fazekas Score: ρ=0.838, p=0.0025 *
    - Tau vs Right Hemisphere: ρ=0.809, p=0.0046 *

T1-T2 (Change) (Bonferroni α=0.000397):
  Total correlations: 126
  Significant (Bonferroni): 0
  Significant (α=0.05): 3
  Non-significant: 123
  Top 3 correlations:
    - IL-12p70 vs Fazekas Score: ρ=0.840, p=0.0023 *
    - IL-12p70 vs Pre-TAVI Infarcts: ρ=0.739, p=0.0146 *
    - S100B vs Cortical: ρ=0.695, p=0.0255 *

T1-T3 (Change) (Bonferroni α=0.000397):
  Total correlations: 126
  Significant (Bonferroni): 0
  Significant (α=0.05): 8
  Non-significant: 118
  Top 3 correlations:
    - GFAP vs Cerebellum: ρ=0.799, p=0.0056 *
    - IL-6 vs Pre-TAVI Infarcts: ρ=0.775, p=0.0085 *
    - NF-L vs Left Hemisphere: ρ=0.750, p=0.0125 *

T1-T4 (Change) (Bonferroni α=0.000397):
  Total correlations: 126
  Significant (Bonferroni): 1
  Significant (α=0.05): 13
  Non-significant: 113
  Top 3 correlations:
    - IL-1β vs Left Hemisphere: ρ=-0.927, p=0.0001 ***
    - IL-1β vs Total New Infarcts: ρ=-0.839, p=0.0024 *
    - Tau vs Cortical: ρ=0.800, p=0.0054 *

T2-T3 (Change) (Bonferroni α=0.000397):
  Total correlations: 126
  Significant (Bonferroni): 0
  Significant (α=0.05): 1
  Non-significant: 125
  Top 3 correlations:
    - TNF-α vs Cerebellum: ρ=-0.690, p=0.0273 *

T2-T4 (Change) (Bonferroni α=0.000397):
  Total correlations: 126
  Significant (Bonferroni): 0
  Significant (α=0.05): 2
  Non-significant: 124
  Top 3 correlations:
    - Tau vs Cortical: ρ=0.640, p=0.0462 *
    - Tau vs Left Hemisphere: ρ=0.634, p=0.0489 *

T3-T4 (Change) (Bonferroni α=0.000397):
  Total correlations: 126
  Significant (Bonferroni): 0
  Significant (α=0.05): 8
  Non-significant: 118
  Top 3 correlations:
    - TNF-α vs Fazekas Score: ρ=-0.777, p=0.0082 *
    - IFN-γ vs Fazekas Score: ρ=-0.771, p=0.0091 *
    - IL-8 vs Fazekas Score: ρ=-0.750, p=0.0124 *

================================================================================
GENERATING HEATMAPS
================================================================================
Saved heatmap: T1
No description has been provided for this image
Saved heatmap: T2
No description has been provided for this image
Saved heatmap: T3
No description has been provided for this image
Saved heatmap: T4
No description has been provided for this image
Saved heatmap: T1-T2
No description has been provided for this image
Saved heatmap: T1-T3
No description has been provided for this image
Saved heatmap: T1-T4
No description has been provided for this image
Saved heatmap: T2-T3
No description has been provided for this image
Saved heatmap: T2-T4
No description has been provided for this image
Saved heatmap: T3-T4
No description has been provided for this image
================================================================================
ALL FILES SAVED
================================================================================
Location: /Users/valar/Documents/Projects/MRI-TAVI-Cytokines/results/Cytokine_Correlations/

Files created:
  - Comprehensive_Correlations.tsv (main file for exploration)
  - All_Cytokine_MRI_Correlations.csv (complete results)
  - Significant_Bonferroni_Correlations.csv (if any)
  - Significant_Raw_Correlations.csv (if any)
  - Heatmap_T1.png through Heatmap_T3-T4.png (10 heatmaps)
================================================================================

Check IL-13 values being constant¶

In [20]:
# Check IL-13 values across all timepoints
print("="*80)
print("IL-13 VALUES ACROSS TIMEPOINTS")
print("="*80)

# Extract IL-13 data for all patients and timepoints
il13_data = cytokines_df[['Subject', 'Time point', 'IL-13']].copy()
il13_data = il13_data.sort_values(['Subject', 'Time point'])

print("\nAll IL-13 values:")
print(il13_data.to_string(index=False))

# Summary statistics by timepoint
print("\n" + "="*80)
print("IL-13 SUMMARY BY TIMEPOINT")
print("="*80)
for tp in [1, 2, 3, 4]:
    tp_data = il13_data[il13_data['Time point'] == tp]['IL-13']
    print(f"\nTimepoint {tp}:")
    print(f"  n = {len(tp_data)}")
    print(f"  Mean = {tp_data.mean():.4f}")
    print(f"  Std = {tp_data.std():.4f}")
    print(f"  Min = {tp_data.min():.4f}")
    print(f"  Max = {tp_data.max():.4f}")
    print(f"  Unique values: {tp_data.nunique()}")
    print(f"  All values: {sorted(tp_data.unique())}")

# Check if T3 is constant
print("\n" + "="*80)
print("CONSTANT VALUE CHECK")
print("="*80)
for tp in [1, 2, 3, 4]:
    tp_data = il13_data[il13_data['Time point'] == tp]['IL-13']
    if tp_data.std() == 0:
        print(f"⚠️  T{tp}: CONSTANT - all values = {tp_data.iloc[0]:.4f}")
    else:
        print(f"✓  T{tp}: Variable (std = {tp_data.std():.4f})")

# Also check other cytokines for constant values
print("\n" + "="*80)
print("CHECKING ALL CYTOKINES FOR CONSTANT VALUES AT EACH TIMEPOINT")
print("="*80)

for cytokine in cytokines:
    constant_timepoints = []
    for tp in [1, 2, 3, 4]:
        tp_data = cytokines_df[cytokines_df['Time point'] == tp][cytokine]
        if tp_data.std() == 0:
            constant_timepoints.append(f"T{tp}")
    
    if constant_timepoints:
        print(f"⚠️  {cytokine}: Constant at {', '.join(constant_timepoints)}")

print("\n" + "="*80)
================================================================================
IL-13 VALUES ACROSS TIMEPOINTS
================================================================================

All IL-13 values:
Subject  Time point    IL-13
   ST01           1 0.265284
   ST01           2 0.265284
   ST01           3 0.265284
   ST01           4 0.265284
   ST03           1 0.265284
   ST03           2 0.265284
   ST03           3 0.265284
   ST03           4 0.619000
   ST04           1 0.265284
   ST04           2 0.265284
   ST04           3 0.265284
   ST04           4 0.265284
   ST05           1 0.265284
   ST05           2 0.265284
   ST05           3 0.265284
   ST05           4 0.265284
   ST14           1 0.265284
   ST14           2 0.265284
   ST14           3 0.265284
   ST14           4 0.265284
   ST20           1 0.265284
   ST20           2 0.265284
   ST20           3 0.265284
   ST20           4 0.265284
   ST21           1 0.548000
   ST21           2 0.589000
   ST21           3 0.265284
   ST21           4 0.265284
   ST22           1 0.265284
   ST22           2 0.265284
   ST22           3 0.265284
   ST22           4 0.265284
   ST23           1 0.265284
   ST23           2 0.673000
   ST23           3 0.265284
   ST23           4 0.265284
   ST24           1 0.265284
   ST24           2 0.265284
   ST24           3 0.265284
   ST24           4 0.711000

================================================================================
IL-13 SUMMARY BY TIMEPOINT
================================================================================

Timepoint 1:
  n = 10
  Mean = 0.2936
  Std = 0.0894
  Min = 0.2653
  Max = 0.5480
  Unique values: 2
  All values: [np.float64(0.265283587), np.float64(0.548)]

Timepoint 2:
  n = 10
  Mean = 0.3384
  Std = 0.1555
  Min = 0.2653
  Max = 0.6730
  Unique values: 3
  All values: [np.float64(0.265283587), np.float64(0.589), np.float64(0.673)]

Timepoint 3:
  n = 10
  Mean = 0.2653
  Std = 0.0000
  Min = 0.2653
  Max = 0.2653
  Unique values: 1
  All values: [np.float64(0.265283587)]

Timepoint 4:
  n = 10
  Mean = 0.3452
  Std = 0.1699
  Min = 0.2653
  Max = 0.7110
  Unique values: 3
  All values: [np.float64(0.265283587), np.float64(0.619), np.float64(0.711)]

================================================================================
CONSTANT VALUE CHECK
================================================================================
✓  T1: Variable (std = 0.0894)
✓  T2: Variable (std = 0.1555)
⚠️  T3: CONSTANT - all values = 0.2653
✓  T4: Variable (std = 0.1699)

================================================================================
CHECKING ALL CYTOKINES FOR CONSTANT VALUES AT EACH TIMEPOINT
================================================================================
⚠️  IL-13: Constant at T3

================================================================================

Statistical Approach Discussion: Multiple Testing Correction¶

Background¶

We are correlating:

  • 14 cytokines (IL-1β, IL-2, IL-4, IL-6, IL-8, IL-10, IL-12p70, IL-13, TNF-α, IFN-γ, GFAP, NF-L, S100B, Tau)
  • At 10 different time conditions (T1, T2, T3, T4, plus 6 delta/change periods: T1-T2, T1-T3, T1-T4, T2-T3, T2-T4, T3-T4)
  • With 9 MRI outcomes (Fazekas score, pre-TAVI infarcts, total new infarcts, cortical, subcortical, right hemisphere, left hemisphere, cerebellum, microhaemorrhage)

Total possible tests: 14 cytokines × 10 time conditions × 9 MRI outcomes = 1,260 correlations


Question: How should we correct for multiple testing?¶

We have two main options:

Option 1: Stratified Bonferroni Correction ✅ (Currently implemented)¶

Approach:

  • Apply Bonferroni correction separately within each comparison group
  • Each group (e.g., "T1 vs MRI outcomes") has 14 cytokines × 9 outcomes = 126 tests
  • Bonferroni-corrected α per group = 0.05 / 126 = 0.000397
  • Total: 10 separate corrections (one per heatmap)

Rationale:

  • Each timepoint/change period represents a distinct biological question:
    • T1 = baseline inflammatory state
    • T2 = acute procedural response
    • T3 = early post-procedure
    • T4 = delayed response
    • Changes = inflammatory dynamics
  • These are conceptually independent research questions, not one massive screen
  • Standard approach in longitudinal biomarker studies and repeated-measures analyses
  • Each heatmap is a separate analysis with its own biological interpretation

Precedent:

  • Common in multi-timepoint studies (e.g., clinical trials with multiple timepoints)
  • Standard in multi-cohort analyses (each cohort corrected separately)
  • Accepted in metabolomics/proteomics when grouping by pathways or functional categories

Statistical justification:

  • Family-Wise Error Rate (FWER) control within each independent biological question
  • Avoids over-penalizing for testing the same cytokines across biologically distinct conditions
  • Balances Type I and Type II error rates appropriately for exploratory N=10 study

Option 2: Global Bonferroni Correction¶

Approach:

  • Apply Bonferroni correction across all 1,260 tests simultaneously
  • Bonferroni-corrected α = 0.05 / 1260 = 0.0000397 (p < 0.00004)

Rationale:

  • Most conservative approach
  • Controls FWER across the entire analysis
  • Appropriate if this is truly one massive hypothesis-free screen

Problems in our context:

  • Extremely stringent: With N=10, requires near-perfect correlations (ρ > 0.95) to be significant
  • Low power: Likely to result in zero discoveries despite true associations
  • Not biologically motivated: Treats all 1,260 tests as equally related, ignoring natural groupings
  • Over-correction: Penalizes for testing IL-6 at T1, T2, T3, T4 as if these are independent tests, when they're clearly related (same biomarker over time)

Recommendation¶

For this exploratory study with N=10:

Primary approach: Stratified Bonferroni (as implemented)¶

  • Scientifically justified by distinct biological timepoints
  • Balances discovery and rigor
  • Standard in longitudinal biomarker research
  • Each comparison group (heatmap) is conceptually independent

Step 6: Testing a correlation matrix plot¶

In [25]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from scipy.stats import spearmanr

# Create a folder for correlation plots
corrplot_path = f"{results_path}/Correlation_Plots"
os.makedirs(corrplot_path, exist_ok=True)

# ============================================================================
# PREPARE DATA FOR T1 CYTOKINES VS MRI OUTCOMES
# ============================================================================

# Select T1 cytokines
t1_cytokines = ['IL-1β_T1', 'IL-2_T1', 'IL-4_T1', 'IL-6_T1', 'IL-8_T1', 
                'IL-10_T1', 'IL-12p70_T1', 'IL-13_T1', 'TNF-α_T1', 'IFN-γ_T1', 
                'GFAP_T1', 'NF-L_T1', 'S100B_T1', 'Tau_T1']

# Select MRI outcomes (use shorter names for display)
mri_outcome_cols = {
    'Fazekas': 'PRE_SVD Fazekas Score (1,2,3)',
    'Pre-Inf': 'PRE_Infarcts - Total Number',
    'New-Inf': 'POST_Number of new acute infarcts',
    'Cortical': 'POST_New supratentorial infarcts - cortical (number)',
    'Subcort': 'POST_New supratentorial infarcts - subcortical (number)',
    'Right': 'POST_New infarcts - Right cerebral hemisphere (number)',
    'Left': 'POST_New infarcts - Left cerebral hemisphere (number)',
    'Cerebel': 'POST_New Infarcts - Cerebellum (number)',
    'Microhem': 'POST_New microhaemorrhage (number)'
}

# Create a combined dataframe with T1 cytokines and MRI outcomes
plot_data = merged_df[['Subject']].copy()

# Add T1 cytokines (remove _T1 suffix for cleaner labels)
for cyt in t1_cytokines:
    if cyt in merged_df.columns:
        clean_name = cyt.replace('_T1', '')
        plot_data[clean_name] = merged_df[cyt]

# Add MRI outcomes
for short_name, full_name in mri_outcome_cols.items():
    if full_name in merged_df.columns:
        plot_data[short_name] = merged_df[full_name]

# Remove Subject column for plotting
plot_data_numeric = plot_data.drop('Subject', axis=1)

print("="*80)
print("CREATING CORRELATION MATRIX PLOT FOR T1 CYTOKINES VS MRI OUTCOMES")
print("="*80)
print(f"Variables: {plot_data_numeric.shape[1]}")
print(f"Samples: {plot_data_numeric.shape[0]}")
print("="*80 + "\n")

# ============================================================================
# CREATE PAIRS PLOT WITH CORRELATION VALUES
# ============================================================================

# Get list of cytokines and MRI outcomes
cytokine_names = [col for col in plot_data_numeric.columns if col not in mri_outcome_cols.keys()]
mri_names = list(mri_outcome_cols.keys())

n_cytokines = len(cytokine_names)
n_mri = len(mri_names)

# Create figure
fig, axes = plt.subplots(n_mri, n_cytokines, figsize=(28, 20))

# Plot each combination
for i, mri_var in enumerate(mri_names):
    for j, cyt_var in enumerate(cytokine_names):
        ax = axes[i, j]
        
        # Get data
        x = plot_data_numeric[cyt_var].values
        y = plot_data_numeric[mri_var].values
        
        # Remove NaN values
        valid_mask = ~(np.isnan(x) | np.isnan(y))
        x_valid = x[valid_mask]
        y_valid = y[valid_mask]
        
        if len(x_valid) >= 3 and x_valid.std() > 0 and y_valid.std() > 0:
            # Scatter plot
            ax.scatter(x_valid, y_valid, alpha=0.6, s=50, color='#2E86AB', edgecolors='black', linewidth=0.5)
            
            # Add regression line
            z = np.polyfit(x_valid, y_valid, 1)
            p = np.poly1d(z)
            x_line = np.linspace(x_valid.min(), x_valid.max(), 100)
            ax.plot(x_line, p(x_line), "r-", linewidth=2, alpha=0.8)
            
            # Calculate Spearman correlation
            rho, p_val = spearmanr(x_valid, y_valid)
            
            # Add text with correlation
            if p_val < 0.001:
                p_text = 'p<0.001'
            else:
                p_text = f'p={p_val:.3f}'
            
            ax.text(0.05, 0.95, f'ρ={rho:.2f}\n{p_text}', 
                   transform=ax.transAxes, fontsize=9, verticalalignment='top',
                   bbox=dict(boxstyle='round', facecolor='wheat', alpha=0.5))
        else:
            # No valid data or constant
            ax.text(0.5, 0.5, 'N/A', transform=ax.transAxes, 
                   ha='center', va='center', fontsize=12, color='gray')
        
        # Labels only on edges
        if i == n_mri - 1:
            ax.set_xlabel(cyt_var, fontsize=10, fontweight='bold')
        else:
            ax.set_xticklabels([])
        
        if j == 0:
            ax.set_ylabel(mri_var, fontsize=10, fontweight='bold')
        else:
            ax.set_yticklabels([])
        
        # Clean up
        ax.tick_params(labelsize=8)
        ax.spines['top'].set_visible(False)
        ax.spines['right'].set_visible(False)

plt.suptitle('T1 Cytokines vs MRI Outcomes - Correlation Matrix', 
             fontsize=18, fontweight='bold', y=0.995)
plt.tight_layout()
plt.savefig(f'{corrplot_path}/T1_Cytokines_vs_MRI_Pairs_Plot.png', dpi=300, bbox_inches='tight')
print(f"Saved: {corrplot_path}/T1_Cytokines_vs_MRI_Pairs_Plot.png")
plt.show()

# ============================================================================
# ALTERNATIVE: SIMPLIFIED VERSION (FEWER VARIABLES FOR CLARITY)
# ============================================================================

# Select top cytokines and outcomes for cleaner plot
top_cytokines = ['IL-6', 'IL-8', 'TNF-α', 'GFAP', 'NF-L', 'S100B']
top_mri = ['New-Inf', 'Cortical', 'Subcort', 'Right', 'Left']

# Create subset
plot_subset = plot_data_numeric[top_cytokines + top_mri]

n_cyt_sub = len(top_cytokines)
n_mri_sub = len(top_mri)

# Create figure
fig2, axes2 = plt.subplots(n_mri_sub, n_cyt_sub, figsize=(18, 12))

for i, mri_var in enumerate(top_mri):
    for j, cyt_var in enumerate(top_cytokines):
        ax = axes2[i, j]
        
        # Get data
        x = plot_subset[cyt_var].values
        y = plot_subset[mri_var].values
        
        # Remove NaN values
        valid_mask = ~(np.isnan(x) | np.isnan(y))
        x_valid = x[valid_mask]
        y_valid = y[valid_mask]
        
        if len(x_valid) >= 3 and x_valid.std() > 0 and y_valid.std() > 0:
            # Scatter plot
            ax.scatter(x_valid, y_valid, alpha=0.7, s=60, color='#2E86AB', edgecolors='black', linewidth=0.8)
            
            # Add regression line
            z = np.polyfit(x_valid, y_valid, 1)
            p = np.poly1d(z)
            x_line = np.linspace(x_valid.min(), x_valid.max(), 100)
            ax.plot(x_line, p(x_line), "r-", linewidth=2.5, alpha=0.8)
            
            # Calculate Spearman correlation
            rho, p_val = spearmanr(x_valid, y_valid)
            
            # Determine significance
            bonf_alpha = 0.05 / (n_cyt_sub * n_mri_sub)
            if p_val < bonf_alpha:
                sig = '***'
            elif p_val < 0.05:
                sig = '*'
            else:
                sig = ''
            
            # Add text with correlation
            if p_val < 0.001:
                p_text = 'p<0.001'
            else:
                p_text = f'p={p_val:.3f}'
            
            ax.text(0.05, 0.95, f'ρ={rho:.2f}{sig}\n{p_text}', 
                   transform=ax.transAxes, fontsize=10, verticalalignment='top',
                   fontweight='bold',
                   bbox=dict(boxstyle='round', facecolor='wheat', alpha=0.6))
        else:
            ax.text(0.5, 0.5, 'N/A', transform=ax.transAxes, 
                   ha='center', va='center', fontsize=12, color='gray')
        
        # Labels
        if i == n_mri_sub - 1:
            ax.set_xlabel(cyt_var, fontsize=11, fontweight='bold')
        else:
            ax.set_xticklabels([])
        
        if j == 0:
            ax.set_ylabel(mri_var, fontsize=11, fontweight='bold')
        else:
            ax.set_yticklabels([])
        
        ax.tick_params(labelsize=9)
        ax.spines['top'].set_visible(False)
        ax.spines['right'].set_visible(False)
        ax.grid(alpha=0.3, linestyle='--')

plt.suptitle('T1 Key Cytokines vs MRI Outcomes - Correlation Matrix\n(* p<0.05, *** Bonferroni-corrected)', 
             fontsize=16, fontweight='bold', y=0.995)
plt.tight_layout()
plt.savefig(f'{corrplot_path}/T1_Key_Cytokines_vs_MRI_Simplified.png', dpi=300, bbox_inches='tight')
print(f"Saved: {corrplot_path}/T1_Key_Cytokines_vs_MRI_Simplified.png")
plt.show()

print("\n" + "="*80)
print("CORRELATION MATRIX PLOTS CREATED")
print("="*80)
print(f"Location: {corrplot_path}/")
print("\nFiles created:")
print("  - T1_Cytokines_vs_MRI_Pairs_Plot.png (all variables)")
print("  - T1_Key_Cytokines_vs_MRI_Simplified.png (key variables only)")
================================================================================
CREATING CORRELATION MATRIX PLOT FOR T1 CYTOKINES VS MRI OUTCOMES
================================================================================
Variables: 23
Samples: 10
================================================================================

Saved: /Users/valar/Documents/Projects/MRI-TAVI-Cytokines/results/Correlation_Plots/T1_Cytokines_vs_MRI_Pairs_Plot.png
No description has been provided for this image
Saved: /Users/valar/Documents/Projects/MRI-TAVI-Cytokines/results/Correlation_Plots/T1_Key_Cytokines_vs_MRI_Simplified.png
No description has been provided for this image
================================================================================
CORRELATION MATRIX PLOTS CREATED
================================================================================
Location: /Users/valar/Documents/Projects/MRI-TAVI-Cytokines/results/Correlation_Plots/

Files created:
  - T1_Cytokines_vs_MRI_Pairs_Plot.png (all variables)
  - T1_Key_Cytokines_vs_MRI_Simplified.png (key variables only)
In [ ]: